x = Dense(32, activation='relu')(x)
    x = Dense(1, activation='linear')(x)
    critic = Model(inputs=(action_input, observation_input), outputs=x)
    print(critic.summary())

    # Define a memory buffer for the agent, allows to learn from past experiences
    memory = SequentialMemory(
        limit=10000,
        window_length=window_length
    )

    # Create a random process for exploration during training
    # this is essential for the DDPG algorithm
    random_process = OrnsteinUhlenbeckProcess(
        theta=0.5,
        mu=0.0,
        sigma=0.2
    )

    # Create the agent for DDPG learning
    agent = DDPGAgent(
        # Pass the previously defined characteristics
        nb_actions=nb_actions,
        actor=actor,
        critic=critic,
        critic_action_input=action_input,
        memory=memory,
        random_process=random_process,

        # Define the overall training parameters
        nb_steps_warmup_actor=2048,
Пример #2
0
def main(train_test_flag='train'):
    get_custom_objects().update(
        {'SmoothLogistic': Activation(smooth_logistic)})
    model_name = '2,2,3x32Net_r4_lr{}_th{}_[t{}s{}]_nAnn[{},{}]_{}'. \
        format(LR,
               SUCCESS_THRESHOLD,
               THETA,
               SIGMA,
               SIGMA_MIN,
               NUM_STEPS_ANNEALING,
               NUM_MUSCLES)
    muscle_labels = ["m" + str(i) for i in np.array(range(NUM_MUSCLES))]

    training = False
    weight_filename = os.path.join(c.trained_directory,
                                   '{}_weights.h5f'.format(model_name))
    log_file_name = begin_time + '_' + model_name

    while True:
        try:
            env = PointModel2dEnv(verbose=0,
                                  success_thres=SUCCESS_THRESHOLD,
                                  dof_observation=DOF_OBSERVATIONS,
                                  include_follow=False,
                                  port=PORT,
                                  muscle_labels=muscle_labels,
                                  log_file=log_file_name)
            break
        except ConnectionRefusedError as e:
            print("Server not started: ", e)
            time.sleep(10)
    try:
        env.seed(123)
        nb_actions = env.action_space.shape[0]
        memory = SequentialMemory(limit=MEMORY_SIZE, window_length=1)

        mu_model = get_mu_model(env)
        v_model = get_v_model(env)
        l_model = get_l_model(env)

        random_process = OrnsteinUhlenbeckProcess(
            size=nb_actions,
            theta=THETA,
            mu=MU,
            sigma=SIGMA,
            dt=DT,
            sigma_min=SIGMA_MIN,
            n_steps_annealing=NUM_STEPS_ANNEALING)
        # random_process = None
        processor = PointModel2dProcessor()
        agent = MuscleNAFAgent(nb_actions=nb_actions,
                               V_model=v_model,
                               L_model=l_model,
                               mu_model=mu_model,
                               memory=memory,
                               nb_steps_warmup=WARMUP_STEPS,
                               random_process=random_process,
                               gamma=GAMMA,
                               target_model_update=UPDATE_TARGET_MODEL_STEPS,
                               processor=processor,
                               target_episode_update=True)

        agent.compile(Adam(lr=LR), metrics=['mse'])
        env.agent = agent
        pprint.pprint(agent.get_config(False))
        load_weights(agent, weight_filename)

        tensorboard = RlTensorBoard(log_dir=os.path.join(
            c.tensorboard_log_directory, log_file_name),
                                    histogram_freq=HISTOGRAM_FREQ,
                                    batch_size=BATCH_SIZE,
                                    write_graph=True,
                                    write_grads=True,
                                    write_images=False,
                                    embeddings_freq=0,
                                    embeddings_layer_names=None,
                                    embeddings_metadata=None,
                                    agent=agent)
        csv_logger = keras.callbacks.CSVLogger(os.path.join(
            c.agent_log_directory, log_file_name),
                                               append=False,
                                               separator=',')

        if train_test_flag == 'train':
            # train code
            training = True
            agent.fit(env,
                      nb_steps=NUM_TRAINING_STEPS,
                      visualize=False,
                      verbose=VERBOSITY,
                      nb_max_episode_steps=NUM_MAX_EPISODE_STEPS,
                      callbacks=[tensorboard, csv_logger])
            print('Training complete')
            save_weights(agent, weight_filename)
        elif train_test_flag == 'test':
            # test code
            training = False
            env.log_to_file = False
            history = agent.test(env,
                                 nb_episodes=NUM_EPISODES,
                                 nb_max_episode_steps=NUM_MAX_EPISODE_STEPS)
            print(history.history)
            print('Average last distance: ',
                  np.mean(history.history['last_distance']))
            print('Mean Reward: ', np.mean(history.history['episode_reward']))

    except Exception as e:
        if training:
            save_weights(agent, weight_filename)
        print("Error in main code:", str(e))
        env.net.sock.close()
        raise e
flattened_observation = Flatten()(observation_input)
x = concatenate([action_input, flattened_observation])
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                          mu=0.,
                                          sigma=.2,
                                          size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
Пример #4
0
def main(args):
    sigma, learning_rate, file_prefix = args

    env = ModifiedArmEnv(visualize=False)
    input_shape = (1, ) + env.observation_space.shape
    nb_actions = env.action_space.shape[0]

    # Create actor and critic networks
    actor = Sequential()
    actor.add(Flatten(input_shape=input_shape))
    actor.add(Dense(32))
    actor.add(Activation('relu'))
    actor.add(Dense(32))
    actor.add(Activation('relu'))
    actor.add(Dense(32))
    actor.add(Activation('relu'))
    actor.add(Dense(nb_actions))
    actor.add(Activation('sigmoid'))

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=input_shape, name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = Dense(64)(x)
    x = Activation('relu')(x)
    x = Dense(64)(x)
    x = Activation('relu')(x)
    x = Dense(64)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)

    # Set up the agent for training
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=sigma,
                                              dt=env.stepsize,
                                              size=env.noutput)
    agent = DDPGAgent(
        nb_actions=nb_actions,
        actor=actor,
        critic=critic,
        critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup_critic=100,
        nb_steps_warmup_actor=100,
        random_process=random_process,
        gamma=.99,
        target_model_update=1e-3,
        delta_clip=1.,
    )
    agent.compile(Adam(lr=learning_rate, clipnorm=1.), metrics=['mae'])

    # Train the model
    training_history = RewardsLogger()
    env.reset()
    agent.fit(
        env,
        nb_steps=100000,
        visualize=False,
        verbose=1,
        nb_max_episode_steps=200,
        log_interval=10000,
        callbacks=[training_history],
    )

    # Save weights and training history
    agent.save_weights(file_prefix + '_weights.h5f', overwrite=True)
    pickledump(training_history, file_prefix + '_training_history.pkl')

    # Set test parameters
    test_nb_episodes = 10
    test_nb_max_episode_steps = 1000

    # Run test
    test_history = ObservationsLogger()
    env.reset()
    agent.test(
        env,
        nb_episodes=test_nb_episodes,
        visualize=False,
        nb_max_episode_steps=test_nb_max_episode_steps,
        callbacks=[test_history],
    )
    # Save test history
    pickledump(test_history, file_prefix + '_test_history.pkl')
h3 = Activation('relu')(h3)

h4 = Dense(HYP.HIDDEN_UNITS_2, name='Q_h4')(h3)
h4 = Dropout(HYP.DROPOUT)(h4)
h4 = Activation('relu')(h4)

Qvalues = Dense(1, activation='linear', name='Q_last')(h4)

critic = Model(inputs=[action_input, observation_input], outputs=Qvalues)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=HYP.MEMORY, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=HYP.THETA,
                                          mu=HYP.MU,
                                          sigma=HYP.SIGMA)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  batch_size=HYP.BATCH_SIZE,
                  nb_steps_warmup_actor=HYP.WARMUP_ACTOR,
                  nb_steps_warmup_critic=HYP.WARMUP_CRITIC,
                  random_process=random_process,
                  gamma=HYP.GAMMA,
                  target_model_update=HYP.TAU)
agent.compile(Adam(lr=HYP.LEARN_R, clipnorm=HYP.CLIPNORM), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for
Пример #6
0
    def __init__(self):
        ENV_NAME = 'drone'
        # Get the environment and extract the number of actions.
        #env = gym.make(ENV_NAME)
        env = drone_sim()
        np.random.seed(123)
        env.seed(123)
        assert len(env.action_space.shape) == 1
        nb_actions = env.action_space.shape[0]

        # Next, we build a very simple model.
        self.actor = Sequential()
        self.actor.add(Flatten(input_shape=(1, ) +
                               env.observation_space.shape))
        self.actor.add(Dense(16))
        self.actor.add(Activation('relu'))
        self.actor.add(Dense(16))
        self.actor.add(Activation('relu'))
        self.actor.add(Dense(16))
        self.actor.add(Activation('relu'))
        self.actor.add(
            Dense(nb_actions,
                  activation='tanh',
                  kernel_initializer=RandomUniform()))
        self.actor.add(Lambda(lambda x: x * 60.0))
        print(self.actor.summary())

        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + env.observation_space.shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = Concatenate()([action_input, flattened_observation])
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        print(critic.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                  theta=.15,
                                                  mu=0.,
                                                  sigma=.3)
        self.agent = DDPGAgent(nb_actions=nb_actions,
                               actor=self.actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               nb_steps_warmup_critic=100,
                               nb_steps_warmup_actor=100,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
Пример #7
0
print(critic.summary())

filename_exp = 'exp_0'
log_filename_pre = '../results/T2D1/'
process_noise_std = 0*20
theta=0.15

GAMMA = 1            # GAMMA of our cumulative reward function
STEPS_PER_EPISODE = 400     # No. of time-steps per episode

# configure and compile our agent by using built-in Keras optimizers and the metrics!
# allocate the memory by specifying the maximum no. of samples to store
memory = SequentialMemory(limit=800000, window_length=1)
# random process for exploration noise
#random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, dt=0.01, mu=0., sigma=.2)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=theta, dt=0.01, mu=0., sigma=.35, sigma_min=0.01)

# define the DDPG agent
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=GAMMA, target_model_update=5e-4)
# compile the model
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mse'])

callbacks = common_func.build_callbacks(ENV_NAME, log_filename_pre, filename_exp)
# ----------------------------------------------------------------------------------------------------------------------------------------
# Training phase
# fitting the agent, after training is done, we save the final weights.

# agent.fit(env, nb_steps=600000, visualize=False, callbacks=callbacks, verbose=1, gamma=GAMMA, nb_max_episode_steps=STEPS_PER_EPISODE, process_noise_std=process_noise_std)
# agent.save_weights(log_filename_pre+filename_exp+'/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
Пример #8
0
NB_STEPS = 50000
PRE_WARM_STEP = 0
SAVE_INTERVAL = 200
naive_env = env.unwrapped
step_length = naive_env.simulation.step_length / 1000
plan_horizon = naive_env.horizon
goal_length = naive_env.goal_length
NB_MAX_EPISODE_STEPS = goal_length / (
    (step_length * plan_horizon) * 5
)  # episode length / (times per action * min v)

# turn left agent
left_processor = WhiteningNormalizerProcessor()
left_memory = SequentialMemory(limit=MEMORY_LIMIT, window_length=WINDOW_LENGTH)
left_random_process = OrnsteinUhlenbeckProcess(size=lower_nb_actions,
                                               theta=RANDOM_PROCESS_THETA,
                                               mu=RANDOM_PROCESS_MU,
                                               sigma=RANDOM_PROCESS_SIGMA)
left_agent = DDPGAgent(processor=left_processor,
                       nb_actions=lower_nb_actions,
                       actor=left_actor_model,
                       critic=left_critic_model,
                       critic_action_input=critic_action_input,
                       memory=left_memory,
                       nb_steps_warmup_critic=NB_STEPS_WARMUP_CRITIC,
                       nb_steps_warmup_actor=NB_STEPS_WARMUP_ACTOR,
                       random_process=left_random_process,
                       gamma=GAMMA,
                       target_model_update=TARGET_MODEL_UPDATE,
                       batch_size=BATCH_SIZE_LOWER)
left_agent.compile(Adam(lr=OPTIMIZER_LR, clipnorm=OPTIMIZER_CLIPNORM),
                   metrics=['mae'])
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(32, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(1, activation='linear')(x)
    critic = Model(inputs=(action_input, observation_input), outputs=x)
    print(critic.summary())

    # Create a replay memory
    memory = SequentialMemory(limit=50000, window_length=window_length)

    # Create a random process for exploration during training
    random_process = OrnsteinUhlenbeckProcess(theta=0.5,
                                              mu=0.0,
                                              sigma=0.2,
                                              sigma_min=0.02,
                                              n_steps_annealing=150000)

    # Create the agent
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      random_process=random_process,
                      nb_steps_warmup_actor=1024,
                      nb_steps_warmup_critic=1024,
                      target_model_update=1000,
                      gamma=0.9,
                      batch_size=128,
Пример #10
0
filename_exp='exp_s/exp_0'
log_filename_pre = '../results/Pendulum/'
process_noise_std = 0.5*5.8 # no ref
theta=0.15
sigma=6

GAMMA=1  	# GAMMA of our cumulative reward function
STEPS_PER_EPISODE = 30  	# No. of time-steps per episode

# configure and compile our agent by using built-in Keras optimizers and the metrics!

# allocate the memory by specifying the maximum no. of samples to store
memory = SequentialMemory(limit=300000, window_length=1)
# random process for exploration noise
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=theta, mu=0., dt=0.01, sigma=sigma)
# define the DDPG agent
agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
                  random_process=random_process, gamma=GAMMA, target_model_update=1e-3)
# compile the model as follows
agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mse'])

callbacks = common_func.build_callbacks(ENV_NAME, log_filename_pre, filename_exp)
# ----------------------------------------------------------------------------------------------------------------------------------------
# Training phase
# fitting the agent. After training is done, save the final weights.
# 240000

# agent.fit(env, nb_steps=300000, visualize=False, callbacks=callbacks, verbose=1, nb_max_episode_steps=STEPS_PER_EPISODE, process_noise_std=process_noise_std)
# agent.save_weights(log_filename_pre+filename_exp+'/ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
Пример #11
0
x = merge([action_input, flattened_observation], mode='concat')
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000)
random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  delta_range=(-10., 10.))
agent.compile([RMSprop(lr=.001), RMSprop(lr=.001)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
Пример #12
0
    def __init__(self, env, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, batch_size=32, lr=.01, clipnorm=1.,
                               gamma=.99, target_model_update=1e-2, theta=.15, mu=0., sigma=.3, name=None):

        memory_len = 1

        self.env = env
        self.name = name

        action_input = Input(shape=(self.env.action_space.shape[0],), name='action_input')
        observation_input = Input(shape=(memory_len, self.env.obs_steps, len(self.env.observation_space)),
                                  name='observation_input')
        processed_observation = Lambda(lambda x: K.squeeze(x, axis=1), name='processed_observation')(observation_input)
        #
        # def process_obs(obs):
        #     obs =  K.squeeze(obs, axis=1)
        #     obs = K.(obs[:,:,1], obs[:,:,2])



        ## Shared layers
        # self.shared_convs = [Conv1D(32, kernel_size=1, padding='same', activation='relu')] * 2 + \
        # [Conv1D(32, kernel_size=6, padding='same', activation='relu')] * 3
        #
        # self.shared_grus = [GRU(32, activation='relu', return_sequences=True)] * 3 +\
        # [GRU(32, activation='relu', return_sequences=False)]


        ## Shared vision and sequence models
        # Vision model
        c = BatchNormalization()(processed_observation)
        c1 = Conv1D(8, kernel_size=1, padding='same', activation='selu')(c)
        # c1 = BatchNormalization()(c1)
        c1 = Conv1D(16, kernel_size=3, padding='same', activation='selu')(c1)
        c1 = MaxPool1D(strides=1, padding='same')(c1)

        c2 = Conv1D(8, kernel_size=1, padding='same', activation='selu')(c)
        # c2 = BatchNormalization()(c2)
        c2 = Conv1D(16, kernel_size=6, padding='same', activation='selu')(c2)

        c3 = MaxPool1D(strides=1, padding='same')(c2)
        c3 = Conv1D(8, kernel_size=6, padding='same', activation='selu')(c3)

        c4 = Concatenate(axis=-1)([c1, c2, c3])

        # c5 = BatchNormalization()(c4)
        c5 = Conv1D(8, kernel_size=12, padding='same', activation='selu')(c4)
        # c5 = BatchNormalization()(c5)
        c5 = MaxPool1D(strides=1, padding='same')(c5)
        c5 = Conv1D(16, kernel_size=24, padding='same', activation='selu')(c5)
        c5 = MaxPool1D(strides=1, padding='same')(c5)

        c6 = Concatenate(axis=-1)([c1, c2, c3, c4, c5])

        # Sequence model
        # b = BatchNormalization()(c6)
        r = GRU(16, activation='selu', return_sequences=True)(c6)
        # b2 = BatchNormalization()(b1)
        r = GRU(16, activation='selu', return_sequences=True)(r)
        # b3 = BatchNormalization()(b2)
        r = GRU(16, activation='selu', return_sequences=True)(r)
        # b4 = BatchNormalization()(b3)
        r = GRU(16, activation='selu', return_sequences=False)(r)

        # Shape conforming
        f = Flatten()(c6)
        k = Concatenate(axis=-1)([r, f])

        # Actor voting system
        # a = BatchNormalization()(k)
        a = Dense(512, activation='selu')(k)
        # a = Dropout(0.2)(a)
        a = Concatenate(axis=-1)([a, k])
        # a = BatchNormalization()(a)
        a = Dense(256, activation='selu')(a)
        # a = Dropout(0.2)(a)
        a = Concatenate(axis=-1)([a, k])
        # a = BatchNormalization()(a)
        a = Dense(128, activation='selu')(a)
        # a = Dropout(0.2)(a)
        a = Concatenate(axis=-1)([a, k])
        # a = BatchNormalization()(a)
        a = Dense(64, activation='selu')(a)
        # a = Dropout(0.2)(a)
        # a = BatchNormalization()(a)
        actor_out = Dense(self.env.action_space.shape[0], activation='sigmoid')(a)

        # Critic value estimator
        d = Concatenate(axis=-1)([action_input, k])
        # d = BatchNormalization()(d)
        d = Dense(512, activation='selu')(d)
        # d = Dropout(0.2)(d)
        d = Concatenate(axis=-1)([d, k])
        # d = BatchNormalization()(d)
        d = Dense(256, activation='selu')(d)
        # d = Dropout(0.2)(d)
        # d = BatchNormalization()(d)
        d = Concatenate(axis=-1)([d, k])
        d = Dense(128, activation='selu')(d)
        # d = Dropout(0.2)(d)
        # d = BatchNormalization()(d)
        d = Concatenate(axis=-1)([d, k])
        d = Dense(64, activation='selu')(d)
        # d = Dropout(0.2)(d)
        # d = BatchNormalization()(d)
        critic_out = Dense(self.env.action_space.shape[0], activation='sigmoid')(d)

        # Define and compile models
        self.actor = Model(inputs=observation_input, outputs=actor_out)
        self.critic = Model(inputs=[action_input, observation_input], outputs=critic_out)

        self.memory = SequentialMemory(limit=10000, window_length=memory_len)
        random_process = OrnsteinUhlenbeckProcess(size=self.env.action_space.shape[0], theta=theta, mu=mu, sigma=sigma)
        super().__init__(nb_actions=self.env.action_space.shape[0], actor=self.actor, batch_size=batch_size,
                               critic=self.critic, critic_action_input=action_input, memory=self.memory,
                               nb_steps_warmup_critic=nb_steps_warmup_critic,
                                nb_steps_warmup_actor=nb_steps_warmup_actor, random_process=random_process,
                               gamma=gamma, target_model_update=target_model_update)

        self.compile(Nadam(lr=lr, clipnorm=clipnorm), metrics=['mae'])
Пример #13
0
x = concatenate([action_input, flattened_observation])
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Set up the agent for training
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                          mu=0.,
                                          sigma=.2,
                                          size=env.get_action_space_size())
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  delta_clip=1.)
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
Пример #14
0
    def __init__(self, env, *args, **kwargs):
        super(KerasDDPGAgent, self).__init__(*args, **kwargs)
        self.env = env

        #assert len(env.action_space.shape) == 1
        #TODO: is there a way to output a tuple (6,1)
        nb_actions = sum(
            sum(1 for i in row if i) for row in self.env.action_space.sample())

        #TODO: terminology? feature or observation?
        observation = env.reset()

        print ">>>>>>>>>>>>>>>>>>>", observation.shape

        # TODO: find a way to customize network
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + observation.shape))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(16))
        actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('tanh'))
        actor.add(Lambda(lambda x: x * 3.14159))

        print(actor.summary())

        action_input = Input(shape=(nb_actions, ), name='action_input')

        observation_input = Input(shape=(1, ) + observation.shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)
        x = merge([action_input, flattened_observation], mode='concat')
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(input=[action_input, observation_input], output=x)
        print(critic.summary())

        memory = SequentialMemory(limit=500000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                  theta=.15,
                                                  mu=0.,
                                                  sigma=.3)
        self.agent = DDPGAgent(nb_actions=nb_actions,
                               actor=actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               nb_steps_warmup_critic=1000,
                               nb_steps_warmup_actor=1000,
                               random_process=random_process,
                               gamma=.99,
                               target_model_update=1e-3)
        self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    x = Activation('relu')(x)

# Output Layer
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=2 * NUM_STEPS, window_length=1)
# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          dt=env.tau,
                                          theta=0.6,
                                          mu=0.0,
                                          sigma=0.5,
                                          sigma_min=0.15,
                                          n_steps_annealing=NUM_STEPS)

agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.999,
                  target_model_update=1e-3,
                  delta_clip=1.0)
Пример #16
0
def main():
    history, stock_list, marketdate, item_list = read_stock_history()
    history = history[:, :, :4]
    target_stocks = stock_list
    num_training_time = 2000
    window_length = 50
    nb_actions = len(target_stocks) + 1

    # action_dim = [nb_actions]
    # state_dim = [window_length, nb_actions]
    # batch_size = 64
    # action_bound = 1.
    # tau = 1e-3
    # learning_rate = 1e-4
    #
    # predictor_type = 'cnn'
    # use_batch_norm = True
    #
    # CONFIG = {'seed': 1234,
    #           'episode': 1,
    #           'batch_size': 256,
    #           'gamma': 0.99,
    #           'buffer_size': 100,
    #           'max_step': 500,
    #           'tau': 0.001
    #           }

    # get target history
    target_history = np.empty(shape=(num_training_time, len(target_stocks),
                                     history.shape[2]))
    target_marketdate = marketdate[:num_training_time]

    for i, stock in enumerate(target_stocks):
        target_history[:, i, :] = history[:num_training_time,
                                          stock_list.index(stock), :]

    env = PortfolioEnv(target_history,
                       target_stocks,
                       target_marketdate,
                       steps=252,
                       window_len=window_length)

    np.random.seed(123)
    env.seed(123)

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, window_length, nb_actions, 1),
                              name='observation_input')
    reshaped_obs_input = Reshape(
        (window_length, nb_actions, 1))(observation_input)
    x = Conv2D(32, kernel_size=(3, 1))(reshaped_obs_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(32, kernel_size=(1, 1))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    w_init = keras.initializers.RandomUniform(minval=-0.003,
                                              maxval=0.003,
                                              seed=None)
    x = Dense(nb_actions, activation='softmax', kernel_initializer=w_init)(x)
    # x = NALU(nb_actions)(x)
    actor = Model(inputs=observation_input, outputs=x)

    # actor = Sequential()
    # actor.add(Conv2D(32, kernel_size=(1, 3), input_shape=state_dim + [1]))
    # actor.add(BatchNormalization())
    # actor.add(Activation('relu'))
    # actor.add(Conv2D(32, kernel_size=(1, 1)))
    # actor.add(BatchNormalization())
    # actor.add(Activation('relu'))
    # actor.add(Flatten())
    # actor.add(Dense(64, activation='relu'))
    # actor.add(Dense(64, activation='relu'))
    # w_init = keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=None)
    # actor.add(Dense(nb_actions, activation='softmax', kernel_initializer=w_init))
    print(actor.summary())

    x = Conv2D(32, kernel_size=(3, 1))(reshaped_obs_input)
    x = BatchNormalization()(x)
    l1 = Activation('relu')(x)
    x = Conv2D(32, kernel_size=(1, 1))(l1)
    x = BatchNormalization()(x)
    l2 = Activation('relu')(x)
    flattened_observation = Flatten()(l2)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(64)(x)
    l3 = Activation('relu')(x)
    x = Dense(64)(l3)
    l4 = Activation('relu')(x)
    w_init = keras.initializers.RandomUniform(minval=-0.003,
                                              maxval=0.003,
                                              seed=None)
    x = Dense(1, activation='linear', kernel_initializer=w_init)(l4)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    print(critic.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.15,
                                              mu=0.,
                                              sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=100,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.99,
                      target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    agent_history = agent.fit(env,
                              nb_steps=1000000,
                              visualize=False,
                              verbose=1,
                              nb_max_episode_steps=252)

    # After training is done, we save the final weights.
    agent.save_weights('ddpg_{}_weights.h5f'.format('RL_ENV2_TEST'),
                       overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=252)
Пример #17
0
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(64)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(input=[action_input, observation_input], output=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(theta=float(args.theta),
                                          mu=0.,
                                          sigma=float(args.sigma),
                                          size=env.noutput)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  delta_range=(-100., 100.))
# agent = ContinuousDQNAgent(nb_actions=env.noutput, V_model=V_model, L_model=L_model, mu_model=mu_model,
#                            memory=memory, nb_steps_warmup=1000, random_process=random_process,
#                            gamma=.99, target_model_update=0.1)
Пример #18
0
def main2(mode='train',
          tc='plus',
          load_model=True,
          train_visualize=False,
          train_steps=50,
          n_episodes=10):

    history, factor_id, marketdate = factor_history()
    target_assets = factor_id
    window_length = 50
    mem_size = 100
    steps = 252
    nb_actions = len(target_assets)

    # get target history
    import copy
    target_history = copy.deepcopy(history)
    target_marketdate = copy.deepcopy(marketdate)

    if tc == 'plus':
        trading_cost = 0.001
    elif tc == 'zero':
        trading_cost = 0.00
    else:
        trading_cost = -0.01

    env = PortfolioEnv(target_history,
                       target_assets,
                       target_marketdate,
                       steps=steps,
                       window_length=window_length,
                       trading_cost=trading_cost)

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, window_length, nb_actions, 1),
                              name='observation_input')
    reshaped_obs_input = Reshape(
        (window_length, nb_actions, 1))(observation_input)
    x = Conv2D(10, kernel_size=(30, 1))(reshaped_obs_input)
    x = BatchNormalization()(x)
    x = Activation('linear', name='actor_layer_1')(x)
    x = LeakyReLU()(x)
    x = Conv2D(10, kernel_size=(1, 1))(x)
    x = BatchNormalization()(x)
    x = Activation('linear', name='actor_layer_2')(x)
    x = LeakyReLU()(x)
    flattened_observation = Flatten()(x)
    x = Dense(64, activation='linear')(flattened_observation)
    x = LeakyReLU()(x)
    x = Dense(32, activation='linear')(x)
    x = LeakyReLU(name='actor_layer_3')(x)
    w_init = keras.initializers.RandomUniform(minval=-0.003,
                                              maxval=0.003,
                                              seed=None)
    x = Dense(nb_actions, activation='softmax', kernel_initializer=w_init)(x)
    actor = Model(inputs=observation_input, outputs=x)
    actor_intermediate_1 = Model(
        inputs=actor.inputs, outputs=actor.get_layer('actor_layer_1').output)
    actor_intermediate_2 = Model(
        inputs=actor.inputs, outputs=actor.get_layer('actor_layer_2').output)
    actor_intermediate_3 = Model(
        inputs=actor.inputs, outputs=actor.get_layer('actor_layer_3').output)
    print(actor.summary())

    # x = Conv2D(32, kernel_size=(50, 1))(reshaped_obs_input)
    # x = BatchNormalization()(x)
    # x = Activation('relu')(x)
    # x = Conv2D(32, kernel_size=(1, 1))(x)
    # x = BatchNormalization()(x)
    # x = Activation('relu')(x)
    # flattened_observation = Flatten()(x)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(64, activation='linear', name='critic_layer_1')(x)
    x = LeakyReLU()(x)
    x = Dense(32, activation='linear', name='critic_layer_2')(x)
    x = LeakyReLU()(x)
    w_init = keras.initializers.RandomUniform(minval=-0.003,
                                              maxval=0.003,
                                              seed=None)
    x = Dense(1,
              activation='linear',
              kernel_initializer=w_init,
              name='critic_layer_3')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    critic_intermediate_1 = Model(
        inputs=critic.inputs,
        outputs=critic.get_layer('critic_layer_1').output)
    critic_intermediate_2 = Model(
        inputs=critic.inputs,
        outputs=critic.get_layer('critic_layer_2').output)
    critic_intermediate_3 = Model(
        inputs=critic.inputs,
        outputs=critic.get_layer('critic_layer_3').output)
    print(critic.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=100000, window_length=1)
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                              theta=.15,
                                              mu=0.,
                                              sigma=.3)
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=mem_size,
                      nb_steps_warmup_actor=mem_size,
                      random_process=random_process,
                      gamma=.90,
                      target_model_update=1e-3)
    agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
    if load_model:
        weights_filename = 'model_tc_{}.h5f'.format(tc)
        agent.load_weights(weights_filename)
    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.

    n_loop = n_episodes
    output_actor_1 = np.zeros([
        n_loop, actor_intermediate_1.output_shape[1],
        actor_intermediate_1.output_shape[2]
    ])
    output_actor_2 = np.zeros([
        n_loop, actor_intermediate_2.output_shape[1],
        actor_intermediate_2.output_shape[2]
    ])
    output_actor_3 = np.zeros([n_loop, actor_intermediate_3.output_shape[1]])
    output_critic_1 = np.zeros([n_loop, critic_intermediate_1.output_shape[1]])
    output_critic_2 = np.zeros([n_loop, critic_intermediate_2.output_shape[1]])
    output_critic_3 = np.zeros([n_loop, critic_intermediate_3.output_shape[1]])
    if mode == 'train':
        for l in range(n_loop):
            agent_history = agent.fit(env,
                                      nb_steps=steps * train_steps,
                                      visualize=train_visualize,
                                      verbose=1,
                                      nb_max_episode_steps=steps)
            agent.save_weights('model_tc_{}.h5f'.format(tc), overwrite=True)

            # obs, _, _ = env.src._step()
            recent_obs = agent.recent_observation.reshape(
                1, 1, window_length, nb_actions, 1)
            recent_action = agent.recent_action.reshape(1, nb_actions)
            output_actor_1[l] = actor_intermediate_1.predict(recent_obs)[
                0, :, :, 0]
            output_actor_2[l] = actor_intermediate_2.predict(recent_obs)[
                0, :, :, 0]
            output_actor_3[l] = actor_intermediate_3.predict(recent_obs)
            # print("1: {}, 2:{}, 3:{}, 0's:{}, 1's:{}".format(np.mean(output_actor_1.squeeze(), axis=2),
            #                                                  np.mean(output_actor_2.squeeze(), axis=2),
            #                                                  output_actor_3,
            #                                                  np.sum(output_actor_3 == 0),
            #                                                  np.sum(output_actor_3 > 0)))

            output_critic_1[l] = critic_intermediate_1.predict(
                [recent_action, recent_obs])
            output_critic_2[l] = critic_intermediate_2.predict(
                [recent_action, recent_obs])
            output_critic_3[l] = critic_intermediate_3.predict(
                [recent_action, recent_obs])
            print("1: {}, 2:{}, 3:{}".format(output_critic_1[l],
                                             output_critic_2[l],
                                             output_critic_3[l]))

            agent.test(env,
                       nb_episodes=1,
                       visualize=True,
                       nb_max_episode_steps=steps)
        plot_layer_3d(output_actor_1)
        plot_layer_3d(output_actor_2)
        plot_layer_2d(output_actor_3)
        plot_layer_2d(output_critic_1)
        plot_layer_2d(output_critic_2)
        plot_layer_2d(output_critic_3)

    else:
        weights_filename = 'model_tc_{}.h5f'.format(tc)
        agent.load_weights(weights_filename)
        agent.test(env,
                   nb_episodes=1,
                   visualize=True,
                   nb_max_episode_steps=steps)
Пример #19
0
def main(layers1=[200],
         layers2=[200],
         leaky_alpha=0.10,
         ENV_NAME='EnvPong',
         show=False,
         wall_reward=-0.1,
         touch_reward=0.3,
         n_steps=80000,
         n_alternances=10,
         L_R=0.0001,
         only_test=False,
         opp_aware=[1, 1],
         myopie=[0.00, 0.00],
         ball_speed=1.0,
         weights1_name='',
         weights2_name=''):

    ENV_NAME = ENV_NAME

    conf_name = "{}_layers1={}__layers2={}__leaky={}__lr={}__opp={}__myopia={}__speed={}".format(
        ENV_NAME, layers1, layers2, leaky_alpha, L_R, opp_aware, myopie,
        ball_speed)
    #gym.undo_logger_setup()
    # Get the environment and extract the number of actions.

    if ENV_NAME == 'Env2D':
        env = Game2D(2.)
    elif ENV_NAME == 'Env2DSoloSpin':
        env = Game2DSolo(2., spinRacket=True)
    elif ENV_NAME == 'Env3DSolo':
        env = Game3DSolo(2., 9.8, 0.5, 7., 3.)
    elif ENV_NAME == 'EnvPong':
        env = Pong(PongPlayer(None, opp_aware=(opp_aware[0] == 1)),
                   PongPlayer(None, opp_aware=(opp_aware[1] == 1)))
    np.random.seed(123)
    #env.seed(123)
    assert len(env.action_space.shape) == 1
    nb_actions = env.action_space.shape[0]

    # Next, we build a very simple model.
    actor = Sequential()
    actor.add(Flatten(input_shape=(1, ) + env.observation_space_1.shape))
    #actor.add(keras.layers.normalization.BatchNormalization())
    for size in layers1:
        actor.add(
            Dense(size,
                  kernel_initializer=RandomUniform(minval=-0.005,
                                                   maxval=0.005,
                                                   seed=None)))
        #actor.add(keras.layers.core.Dropout(0.2))
        actor.add(LeakyReLU(leaky_alpha))
    #actor.add(keras.layers.normalization.BatchNormalization())
    actor.add(
        Dense(nb_actions,
              kernel_initializer=RandomUniform(minval=-0.005,
                                               maxval=0.005,
                                               seed=None),
              bias_regularizer=regularizers.l2(0.01)))
    #actor.add(keras.layers.core.Dropout(0.2))
    actor.add(Activation('linear'))
    print(actor.summary())

    action_input = Input(shape=(nb_actions, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space_1.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = merge([action_input, flattened_observation], mode='concat')
    #x = keras.layers.normalization.BatchNormalization()(x)
    for size in layers1:
        x = Dense(size)(x)
        #x = keras.layers.core.Dropout(0.2)(x)
        x = LeakyReLU(alpha=leaky_alpha)(x)
    #x = keras.layers.normalization.BatchNormalization()(x)
    x = Dense(1)(x)
    x = Activation('linear')(x)
    critic = Model(input=[action_input, observation_input], output=x)
    print(critic.summary())

    actor2 = Sequential()
    actor2.add(Flatten(input_shape=(1, ) + env.observation_space_2.shape))
    #actor2.add(keras.layers.normalization.BatchNormalization())
    for size in layers2:
        actor2.add(
            Dense(size,
                  kernel_initializer=RandomUniform(minval=-0.005,
                                                   maxval=0.005,
                                                   seed=None)))
        #actor2.add(keras.layers.core.Dropout(0.2))
        actor2.add(LeakyReLU(alpha=leaky_alpha))
    actor2.add(
        Dense(nb_actions,
              kernel_initializer=RandomUniform(minval=-0.005,
                                               maxval=0.005,
                                               seed=None),
              bias_regularizer=regularizers.l2(0.01)))
    #actor2.add(keras.layers.core.Dropout(0.2))
    actor2.add(Activation('linear'))
    print(actor2.summary())

    action_input2 = Input(shape=(nb_actions, ), name='action_input')
    observation_input2 = Input(shape=(1, ) + env.observation_space_2.shape,
                               name='observation_input')
    flattened_observation2 = Flatten()(observation_input2)
    x2 = merge([action_input2, flattened_observation2], mode='concat')
    #x2 = keras.layers.normalization.BatchNormalization()(x2)
    for size in layers2:
        x2 = Dense(size)(x2)
        #x2 = keras.layers.core.Dropout(0.2)(x2)
        x2 = LeakyReLU(alpha=leaky_alpha)(x2)
    x2 = Dense(1)(x2)
    x2 = Activation('linear')(x2)
    critic2 = Model(input=[action_input2, observation_input2], output=x2)
    print(critic2.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory1 = SequentialMemory(limit=50000, window_length=1)
    if opp_aware[0] != opp_aware[1]:
        memory2 = SequentialMemory(limit=50000, window_length=1)
    else:
        memory2 = memory1
    random_process1 = OrnsteinUhlenbeckProcess(size=nb_actions,
                                               theta=.1,
                                               mu=0.,
                                               sigma=.15,
                                               sigma_min=0.,
                                               n_steps_annealing=n_steps /
                                               4)  # Explores less at the end ?
    random_process2 = OrnsteinUhlenbeckProcess(size=nb_actions,
                                               theta=.1,
                                               mu=0.,
                                               sigma=.15,
                                               sigma_min=0.,
                                               n_steps_annealing=4 * n_steps)
    agent1 = DDPGAgent(nb_actions=nb_actions,
                       actor=actor,
                       critic=critic,
                       critic_action_input=action_input,
                       memory=memory1,
                       nb_steps_warmup_critic=5000,
                       nb_steps_warmup_actor=5000,
                       random_process=random_process1,
                       gamma=.99,
                       target_model_update=1e-3,
                       batch_size=100)
    agent2 = DDPGAgent(nb_actions=nb_actions,
                       actor=actor2,
                       critic=critic2,
                       critic_action_input=action_input2,
                       memory=memory2,
                       nb_steps_warmup_critic=5000,
                       nb_steps_warmup_actor=5000,
                       random_process=random_process2,
                       gamma=.99,
                       target_model_update=1e-3,
                       batch_size=100)

    #agent.compile(Adam(lr=L_R, clipnorm=1., clipvalue=0.5), metrics=['mae'])
    agent1.compile(Adam(lr=L_R, clipnorm=1.), metrics=['mae'])
    agent2.compile(Adam(lr=L_R, clipnorm=1.), metrics=['mae'])

    player1 = PongPlayer(agent1,
                         myopie=myopie[0],
                         opp_aware=(opp_aware[0] == 1))
    player2 = PongPlayer(agent2,
                         myopie=myopie[1],
                         opp_aware=(opp_aware[1] == 1))

    # Grid -4
    # Add -1 when lost
    # CEM method

    directory_log = "logs/ddpg/{}".format(conf_name)
    directory_weights = "weights/ddpg/{}".format(conf_name)

    if not os.path.exists(directory_log):
        os.makedirs(directory_log)
    if not os.path.exists(directory_weights):
        os.makedirs(directory_weights)

    if only_test:
        '''if weights1_name =='':
            weights1_name = "{}/player1_final".format(directory_weights)
        if weights2_name == '':
            weights2_name = "{}/player2_final".format(directory_weights)
        #if os.path.isfile(weights1_name) and os.path.isfile(weights2_name):
        agent1.load_weights(weights1_name)
        agent2.load_weights(weights2_name)'''

        agent1.load_weights("{}/player1_{}".format(directory_weights, "final"))
        agent2.load_weights("{}/player1_{}".format(directory_weights, "final"))

        env = makeEnv(player1, player2, ENV_NAME, ball_speed=ball_speed)
        for i in range(10):
            playPong(env)
        confrontPlayers(env)
        plotStrategy(env)

    else:

        for i in range(n_alternances):

            print "Alternance n {} \n".format(i)

            def learning_rate_schedule(epoch):
                return L_R

            if ENV_NAME == 'Env2D':
                env = Game2D(agent2,
                             wall_reward=wall_reward,
                             touch_reward=touch_reward)
            elif ENV_NAME == 'EnvPong':
                env = Pong(player1,
                           player2,
                           wall_reward=wall_reward,
                           touch_reward=touch_reward,
                           ball_speed=ball_speed)
            agent1.fit(env,
                       nb_steps=n_steps,
                       visualize=False,
                       verbose=1,
                       until_score=True,
                       score_to_reach=0.5,
                       last_episodes=500,
                       nb_max_episode_steps=None,
                       callbacks=[
                           FileLogger("{}/player1_{}.h5f".format(
                               directory_log, i)),
                           keras.callbacks.LearningRateScheduler(
                               learning_rate_schedule)
                       ])
            agent1.test(env,
                        nb_episodes=100,
                        visualize=False,
                        nb_max_episode_steps=500,
                        verbose=1)
            agent1.save_weights("{}/player1_{}".format(directory_weights, i),
                                overwrite=True)
            agent1.memory = SequentialMemory(limit=500000, window_length=1)
            wall_reward = wall_reward * 0.8
            touch_reward = touch_reward * 0.8
            agent2.load_weights("{}/player1_{}".format(directory_weights, i))

        print "Fin de {}".format(conf_name)
        env = Pong(player1,
                   player2,
                   wall_reward=wall_reward,
                   touch_reward=touch_reward,
                   ball_speed=ball_speed)

        #agent1.fit(env, nb_steps=150000, visualize=False, verbose=2, nb_max_episode_steps=None,callbacks=[FileLogger("logs/ddpg/{}_weights_steps_leaky_reg_bias_drop_lr{}.h5f".format(ENV_NAME,L_R), interval=100)])
        agent1.save_weights("{}/player1_final".format(directory_weights),
                            overwrite=True)
        agent2.save_weights("{}/player2_final".format(directory_weights),
                            overwrite=True)

        agent1.test(env,
                    nb_episodes=15,
                    visualize=False,
                    nb_max_episode_steps=500,
                    verbose=2)

    if show == True:

        if ENV_NAME == 'Env2D':
            for i in range(10):
                play2D(player1=agent1, player2=agent1)
        elif ENV_NAME == 'EnvPong':
            for i in range(10):
                playPong(left=agent1, right=agent2)
Пример #20
0
cfg = tf.ConfigProto(allow_soft_placement=True)
cfg.gpu_options.allow_growth = True

env = gym.make('fooEnv_ID')

n_actions = env.action_space.n  #This will depend on the space you use, e.g. an action_space in box could be env.action_space.shape[0]
#reference https://github.com/openai/gym/tree/master/gym/spaces

#Architecture, simple feed-forward dense net
inp = Input(((1, ) + env.observation_space.shape))
fl1 = Flatten()(inp)
dn1 = Dense(100, activation='relu')(fl1)
dn2 = Dense(100, activation='relu')(dn1)
otp = Dense(n_actions, activation='linear')(dn2)
DQNModel = Model(input=inp, output=otp)

random_process = OrnsteinUhlenbeckProcess(
)  #Optional random process, see https://github.com/keras-rl/keras-rl/blob/master/rl/random.py
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()

agentDQN = DQNAgent(model=DQNModel,
                    nb_actions=n_actions,
                    memory=memory,
                    nb_steps_warmup=10,
                    target_model_update=1e-2,
                    policy=policy,
                    random_process=random_process)
agentDQN.compile(Adam(lr=1e-3), metrics=['mae'])
agentDQN.fit(env, nb_steps=10000, visualize=False)
Пример #21
0
    def __init__(self, env, rl_lr, rl_memory_span):
        self.real_env = env
        act_space_shape = self.real_env.action_space.shape
        obs_space_shape = (self.real_env.observation_space.shape[0] + 1,)
        assert len(obs_space_shape) == 1

        # Configure the Neural Networks of the RL-agent
        # 1. Actors:
        rl_num_hidden_layer_actor = 3
        rl_num_neurons_per_layer_actor = 16
        rl_actor1 = Sequential()  # Actor1 is a Sequential Neural Network (MLP)
        rl_actor1.add(Flatten(input_shape=(1,) + obs_space_shape))
        for i in range(rl_num_hidden_layer_actor):  # Add the layers to the actor1 NN
            rl_actor1.add(Dense(rl_num_neurons_per_layer_actor, kernel_initializer=RandomUniform(minval=-1, maxval=1)))
            rl_actor1.add(Activation('relu'))
        rl_actor1.add(Dense(act_space_shape[0], kernel_initializer=RandomUniform(minval=-1, maxval=1)))
        rl_actor1.add(Activation('linear'))

        rl_actor2 = Sequential()  # Actor2 is a Sequential Neural Network (MLP)
        rl_actor2.add(Flatten(input_shape=(1,) + obs_space_shape))
        for i in range(rl_num_hidden_layer_actor):  # Add the layers to the actor2 NN
            rl_actor2.add(Dense(rl_num_neurons_per_layer_actor, kernel_initializer=RandomUniform(minval=-1, maxval=1)))
            rl_actor2.add(Activation('relu'))
        rl_actor2.add(
            Dense(act_space_shape[0], kernel_initializer=RandomUniform(minval=-1, maxval=1)))
        rl_actor2.add(Activation('linear'))

        # 2. Critics:
        rl_num_hidden_layer_critic = 3
        rl_num_neurons_per_layer_critic = 32

        action_input1 = Input(shape=act_space_shape, name='action_input')
        observation_input1 = Input(shape=(1,) + obs_space_shape, name='observation_input')
        flattened_observation1 = Flatten()(observation_input1)
        rl_critic_nn1 = Concatenate()([action_input1, flattened_observation1])
        for i in range(rl_num_hidden_layer_critic):
            rl_critic_nn1 = Dense(rl_num_neurons_per_layer_critic,
                                  kernel_initializer=RandomUniform(minval=-1, maxval=1))(rl_critic_nn1)
            rl_critic_nn1 = Activation('relu')(rl_critic_nn1)
        rl_critic_nn1 = Dense(1, kernel_initializer=RandomUniform(minval=-1, maxval=1))(rl_critic_nn1)
        rl_critic_nn1 = Activation('linear')(rl_critic_nn1)
        rl_critic1 = Model(inputs=[action_input1, observation_input1], outputs=rl_critic_nn1)

        action_input2 = Input(shape=act_space_shape, name='action_input')
        observation_input2 = Input(shape=(1,) + obs_space_shape, name='observation_input')
        flattened_observation2 = Flatten()(observation_input2)
        rl_critic_nn2 = Concatenate()([action_input2, flattened_observation2])
        for i in range(rl_num_hidden_layer_critic):
            rl_critic_nn2 = Dense(rl_num_neurons_per_layer_critic,
                                  kernel_initializer=RandomUniform(minval=-1, maxval=1))(rl_critic_nn2)
            rl_critic_nn2 = Activation('relu')(rl_critic_nn2)
        rl_critic_nn2 = Dense(1, kernel_initializer=RandomUniform(minval=-1, maxval=1))(rl_critic_nn2)
        rl_critic_nn2 = Activation('linear')(rl_critic_nn2)
        rl_critic2 = Model(inputs=[action_input2, observation_input2], outputs=rl_critic_nn2)

        # 3. Set training parameters for the Agent and compile it
        rl_mem_size = int(rl_memory_span * round(1 / self.real_env.dt))
        rl_memory1 = SequentialMemory(limit=rl_mem_size, window_length=1)
        rl_memory2 = SequentialMemory(limit=rl_mem_size, window_length=1)
        random_process1 = OrnsteinUhlenbeckProcess(size=act_space_shape[0], theta=.15, mu=0., sigma=.3)
        random_process2 = OrnsteinUhlenbeckProcess(size=act_space_shape[0], theta=.15, mu=0., sigma=.3)

        self.coop_agent = CoopActionOtherDDPG(nb_actions=act_space_shape[0], actor1=rl_actor1, actor2=rl_actor2, critic1=rl_critic1,
                                   critic2=rl_critic2, critic_action_input1=action_input1,
                                   critic_action_input2=action_input2, memory1=rl_memory1, memory2=rl_memory2,
                                   nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process1=random_process1,
                                   random_process2=random_process2, gamma=.99, target_model_update=1e-3)
        self.coop_agent.compile(Adam(lr=rl_lr, clipnorm=1.), metrics=['mae'])
Пример #22
0
def main():
    """Create environment, build models, train."""
    #env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.Basic(30, 4), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv(("EUR", "CASH", "IDEALPRO", "USD"), max_quantity=20000, quantity_increment=20000, obs_xform=xform.Basic(30, 4), episode_steps=STEPS_PER_EPISODE, client_id=5, afterhours=False)
    env = MarketEnv("BTC-USD",
                    max_quantity=10,
                    quantity_increment=1,
                    obs_type='time',
                    obs_size=30,
                    obs_xform=xform.Basic(30, 4),
                    episode_steps=STEPS_PER_EPISODE,
                    client_id=3,
                    loglevel=logging.DEBUG)

    obs_size = np.product(env.observation_space.shape)

    # Actor model
    dropout = 0.1
    actor = Sequential([
        Flatten(input_shape=(1, ) + env.observation_space.shape),
        BatchNormalization(),
        Dense(obs_size, activation='relu'),
        GaussianDropout(dropout),
        BatchNormalization(),
        Dense(obs_size, activation='relu'),
        GaussianDropout(dropout),
        BatchNormalization(),
        Dense(obs_size, activation='relu'),
        GaussianDropout(dropout),
        BatchNormalization(),
        Dense(1, activation='tanh'),
    ])
    print('Actor model')
    actor.summary()

    action_input = Input(shape=(1, ), name='action_input')
    observation_input = Input(shape=(1, ) + env.observation_space.shape,
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = concatenate([action_input, flattened_observation])
    x = BatchNormalization()(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(obs_size + 1, activation='relu')(x)
    x = GaussianDropout(dropout)(x)
    x = Dense(1, activation='linear')(x)
    critic = Model(inputs=[action_input, observation_input], outputs=x)
    print('\nCritic Model')
    critic.summary()

    memory = SequentialMemory(limit=EPISODES * STEPS_PER_EPISODE,
                              window_length=1)
    random_process = OrnsteinUhlenbeckProcess(theta=.5, mu=0., sigma=.5)
    agent = DDPGAgent(
        nb_actions=1,
        actor=actor,
        critic=critic,
        critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup_critic=STEPS_PER_EPISODE * WARMUP_EPISODES,
        nb_steps_warmup_actor=STEPS_PER_EPISODE * WARMUP_EPISODES,
        random_process=random_process,
        gamma=0.95,
        target_model_update=0.01)
    agent.compile('rmsprop', metrics=['mae'])
    weights_filename = 'ddpg_{}_weights.h5f'.format(env.instrument.symbol)
    try:
        agent.load_weights(weights_filename)
        print(
            'Using weights from {}'.format(weights_filename)
        )  # DDPGAgent actually uses two separate files for actor and critic derived from this filename
    except IOError:
        pass
    agent.fit(env,
              nb_steps=EPISODES * STEPS_PER_EPISODE,
              visualize=True,
              verbose=2,
              nb_max_episode_steps=STEPS_PER_EPISODE)
    agent.save_weights(weights_filename, overwrite=True)
Пример #23
0
    def __init__(self,
                 observation_space,
                 action_space,
                 filename='KerasDDPGAgent.h5f'):
        nb_actions = action_space.shape[0]

        # Actor network
        actor = Sequential()
        actor.add(Flatten(input_shape=(1, ) + observation_space.shape))
        actor.add(Dense(256))
        actor.add(Activation('relu'))
        actor.add(Dense(128))
        actor.add(Activation('relu'))
        actor.add(Dense(64))
        actor.add(Activation('relu'))
        actor.add(Dense(nb_actions))
        actor.add(Activation('sigmoid'))
        print(actor.summary())

        # Critic network
        action_input = Input(shape=(nb_actions, ), name='action_input')
        observation_input = Input(shape=(1, ) + observation_space.shape,
                                  name='observation_input')
        flattened_observation = Flatten()(observation_input)

        x = concatenate([action_input, flattened_observation])
        x = Dense(256)(x)
        x = Activation('relu')(x)
        x = Dense(128)(x)
        x = Activation('relu')(x)
        x = Dense(64)(x)
        x = Activation('relu')(x)
        x = Dense(32)(x)
        x = Activation('relu')(x)
        x = Dense(1)(x)
        x = Activation('linear')(x)
        critic = Model(inputs=[action_input, observation_input], outputs=x)
        print(critic.summary())

        # Setup Keras RL's DDPGAgent
        memory = SequentialMemory(limit=100000, window_length=1)
        random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                                  mu=0.,
                                                  sigma=.2,
                                                  size=nb_actions)

        self.agent = DDPGAgent(nb_actions=nb_actions,
                               actor=actor,
                               critic=critic,
                               critic_action_input=action_input,
                               memory=memory,
                               batch_size=128,
                               nb_steps_warmup_critic=128,
                               nb_steps_warmup_actor=128,
                               random_process=random_process,
                               gamma=.75,
                               target_model_update=1e-2,
                               delta_clip=2.)

        self.agent.compile(Adam(lr=.01, clipnorm=2.), metrics=['mae'])

        self.filename = filename
Пример #24
0
    def __init__(self,
                 first_player: bool,
                 stop_ident_time=1e9,
                 do_rl=False,
                 learning_rate=0.01,
                 activation_fcn='relu',
                 learn_time_delta=0.2,
                 rl_time_delta=0.1,
                 epochs=2,
                 fit_batch_size=20,
                 learn_stack=LearningStack(),
                 real_env=CoopPendulum(),
                 rl_memory_span=50,
                 wolf=0.,
                 win_lr_reduction=1,
                 wolf_stop_rl=False):
        """ Sets various parameters, configures the ident, actor and critic NN and compiles the agent"""
        super(PartnerApproximatingLearner, self).__init__(
            first_player)  # Call to __init__ of parent class Controller
        self.learn_stack = learn_stack  # Controller specific LearningStack in which to save the experiences
        self.loosing_lr = learning_rate
        self.rl_lr = .001  # hyper-parameter
        self.win_lr_reduction = win_lr_reduction
        self.wolf = wolf
        self.wolf_stop_rl = wolf_stop_rl
        seed = np.random.randint(0, int(1e6)) + int(
            first_player
        ) * 100  # -> first player gets different seed than second

        # Configure neural network for identification:
        num_hidden_layer_ident = 3
        num_neurons_per_layer_ident = 16
        act_space_shape = real_env.action_space.shape
        obs_space_shape = real_env.observation_space.shape
        ident_nn = Sequential()
        ident_nn.add(
            Dense(num_neurons_per_layer_ident,
                  kernel_initializer=RandomUniform(minval=-1,
                                                   maxval=1,
                                                   seed=seed),
                  input_shape=obs_space_shape))
        for i in range(num_hidden_layer_ident -
                       1):  # Add the layers to the identification NN
            ident_nn.add(
                Dense(num_neurons_per_layer_ident,
                      kernel_initializer=RandomUniform(minval=-1,
                                                       maxval=1,
                                                       seed=seed + i)))
            ident_nn.add(Activation(activation_fcn))
        ident_nn.add(
            Dense(act_space_shape[0],
                  kernel_initializer=RandomUniform(minval=-0.0001,
                                                   maxval=0.0001,
                                                   seed=seed + 9)))
        ident_nn.add(Activation('linear'))
        opt = Adam(lr=learning_rate)  # hyper-parameter
        ident_nn.compile(optimizer=opt, loss='mse')  # hyper-parameter

        # Use the neural network inside a NNController for easy evaluation of the output:
        self.ident_ctrl = StaticNNController(
            first_player=(not self.first_player), neural_net=ident_nn)

        # Set other identification parameters
        self.ident_time_delta = learn_time_delta  # simulation time between training the other_model with experience
        self.last_ident_time = 0  # last time ident NN was trained
        self.epochs = epochs  # number of training epochs when its time to identify again
        self.fit_batch_size = fit_batch_size  # size of mini batch that the batch is split into for training by Keras
        self.stop_ident_time = stop_ident_time  # Time at which no training should occur anymore. Used for testing
        self.do_rl = do_rl
        if do_rl:
            self.rl_env = deepcopy(real_env)
            self.last_rl_time = -1
            self.rl_time_delta = rl_time_delta
            self.rl_env.set_ctrl_other(self.ident_ctrl)
            try:
                self.u_limit = self.rl_env.action_space_u1 if first_player else self.rl_env.action_space_u2
            except AttributeError:  # rl_env does not have individual limits
                self.u_limit = self.rl_env.action_space

            # Configure the Neural Networks of the RL-agent
            # 1. Actor:
            rl_num_hidden_layer_actor = 3
            rl_num_neurons_per_layer_actor = 16
            rl_actor = Sequential(
            )  # Actor is a Sequential Neural Network (MLP)
            rl_actor.add(Flatten(input_shape=(1, ) + obs_space_shape))
            for i in range(rl_num_hidden_layer_actor
                           ):  # Add the layers to the actor NN
                rl_actor.add(
                    Dense(rl_num_neurons_per_layer_actor,
                          kernel_initializer=RandomUniform(minval=-1,
                                                           maxval=1,
                                                           seed=seed + 10 +
                                                           i)))
                rl_actor.add(Activation(activation_fcn))
            rl_actor.add(
                Dense(act_space_shape[0],
                      kernel_initializer=RandomUniform(minval=-1,
                                                       maxval=1,
                                                       seed=seed + 19)))
            rl_actor.add(Activation('linear'))

            # 2. Critic:
            rl_num_hidden_layer_critic = 3
            rl_num_neurons_per_layer_critic = 32
            action_input = Input(shape=act_space_shape, name='action_input')
            observation_input = Input(shape=(1, ) + obs_space_shape,
                                      name='observation_input')
            flattened_observation = Flatten()(observation_input)
            rl_critic_nn = Concatenate()([action_input, flattened_observation])
            for i in range(rl_num_hidden_layer_critic):
                rl_critic_nn = Dense(rl_num_neurons_per_layer_critic,
                                     kernel_initializer=RandomUniform(
                                         minval=-1,
                                         maxval=1,
                                         seed=seed + 20 + i))(rl_critic_nn)
                rl_critic_nn = Activation(activation_fcn)(rl_critic_nn)
            rl_critic_nn = Dense(
                1,
                kernel_initializer=RandomUniform(minval=-1,
                                                 maxval=1,
                                                 seed=seed + 29))(rl_critic_nn)
            rl_critic_nn = Activation('linear')(rl_critic_nn)
            rl_critic = Model(inputs=[action_input, observation_input],
                              outputs=rl_critic_nn)

            # 3. Set training parameters for the Agent and compile it
            rl_frames_per_train = 200
            rl_mem_size = int(
                rl_memory_span *
                (round(1 / self.rl_time_delta) * rl_frames_per_train))
            rl_memory = SequentialMemory(limit=rl_mem_size, window_length=1)
            random_process = OrnsteinUhlenbeckProcess(size=act_space_shape[0],
                                                      theta=.15,
                                                      mu=0.,
                                                      sigma=.3)
            self.rl_agent = DDPGAgent(nb_actions=act_space_shape[0],
                                      actor=rl_actor,
                                      critic=rl_critic,
                                      critic_action_input=action_input,
                                      memory=rl_memory,
                                      nb_steps_warmup_critic=100,
                                      nb_steps_warmup_actor=100,
                                      random_process=random_process,
                                      gamma=.99,
                                      target_model_update=1e-3)
            self.rl_agent.compile(Adam(lr=self.rl_lr, clipnorm=1.),
                                  metrics=['mae'])
            self.rl_actor_ctrl = StaticNNController(
                first_player=self.first_player, neural_net=rl_actor)
Пример #25
0
log_filename_pre = '../results/Swimmer3/'
process_noise_std = 0.00001 * 20
theta = 0.15

GAMMA = 1.0  # GAMMA of our cumulative reward function
STEPS_PER_EPISODE = 1600  # No. of time-steps per episode

# configure and compile our agent by using built-in Keras optimizers and the metrics!
# allocate the memory by specifying the maximum no. of samples to store
memory = SequentialMemory(limit=800000, window_length=1)
# random process for exploration noise
#random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, dt=0.01, mu=0., sigma=.2)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=theta,
                                          dt=0.01,
                                          mu=0.,
                                          sigma=.35,
                                          sigma_min=0.05,
                                          n_steps_annealing=1500000)

# define the DDPG agent
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=100,
                  random_process=random_process,
                  gamma=GAMMA,
                  target_model_update=5e-4)
Пример #26
0
    x = Dense(32, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    x = Dense(1, activation='linear')(x)
    critic = Model(inputs=(action_input, observation_input), outputs=x)
    print(critic.summary())

    # Define a memory buffer for the agent, allows to learn from past experiences
    memory = SequentialMemory(limit=5000, window_length=window_length)

    # Create a random process for exploration during training
    # this is essential for the DDPG algorithm
    random_process = OrnsteinUhlenbeckProcess(theta=0.5,
                                              mu=0.0,
                                              sigma=0.1,
                                              dt=env.physical_system.tau,
                                              sigma_min=0.05,
                                              n_steps_annealing=85000,
                                              size=2)

    # Create the agent for DDPG learning
    agent = DDPGAgent(
        # Pass the previously defined characteristics
        nb_actions=nb_actions,
        actor=actor,
        critic=critic,
        critic_action_input=action_input,
        memory=memory,
        random_process=random_process,

        # Define the overall training parameters
Пример #27
0
        except:
            print("...failed.")
            memory = PrioritizedExperience(memory_size=2**14,
                                           alpha=alpha0,
                                           beta=beta0,
                                           window_length=window_size)

        try:
            print("Trying to load 'OU process'", end="")
            random_process = pickle.load(open("random_process.pkl", "rb"))
            print("...done.")
        except:
            print("...failed.")
            random_process = OrnsteinUhlenbeckProcess(
                size=nb_actions,
                theta=.15,
                mu=0.,
                sigma=.2,
                n_steps_annealing=nb_steps)

        memory_filled = memory.tree.filled_size()
        if memory_filled > 1024:
            warmup_steps = 0
        else:
            warmup_steps = 1024

        agent = DDPG_PERAgent(nb_actions=nb_actions,
                              actor=actor,
                              critic=critic,
                              critic_action_input=models.action_input,
                              memory=memory,
                              nb_steps_warmup_critic=warmup_steps,
def controllera(t, joints, links, joint2, joint3, joint4, joint5, rewarda_ros,
                joint1, agent, graph1, session1):

    if agent.value is None:
        # import keras-rl in NRP through virtual env
        import site, os
        site.addsitedir(
            os.path.expanduser(
                '~/.opt/tensorflow_venv/lib/python2.7/site-packages'))
        from keras.models import Model, Sequential
        from keras.layers import Dense, Activation, Flatten, Input, concatenate
        from keras.optimizers import Adam, RMSprop
        from rl.agents import DDPGAgent
        from rl.memory import SequentialMemory
        from rl.random import OrnsteinUhlenbeckProcess
        from keras import backend as K

        from tensorflow import Session, Graph
        K.clear_session()

        obs_shape = (6, )

        nb_actions = 5

        # create the nets for rl agent
        # actor net

        graph1.value = Graph()
        with graph1.value.as_default():
            session1.value = Session()
            with session1.value.as_default():

                actor = Sequential()
                actor.add(Flatten(input_shape=(1, ) + obs_shape))
                actor.add(Dense(32))
                actor.add(Activation('relu'))
                actor.add(Dense(32))
                actor.add(Activation('relu'))
                actor.add(Dense(32))
                actor.add(Activation('relu'))
                actor.add(Dense(nb_actions))
                actor.add(Activation('sigmoid'))
                clientLogger.info('actor net init')

                # critic net
                action_input = Input(shape=(nb_actions, ), name='action_input')
                observation_input = Input(shape=(1, ) + obs_shape,
                                          name='observation_input')
                flattened_observation = Flatten()(observation_input)
                x = concatenate([action_input, flattened_observation])
                x = Dense(64)(x)
                x = Activation('relu')(x)
                x = Dense(64)(x)
                x = Activation('relu')(x)
                x = Dense(64)(x)
                x = Activation('relu')(x)
                x = Dense(1)(x)
                x = Activation('linear')(x)
                critic = Model(inputs=[action_input, observation_input],
                               outputs=x)
                clientLogger.info('critic net init')

                # instanstiate rl agent
                memory = SequentialMemory(limit=1000, window_length=1)
                random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                                          mu=0.,
                                                          sigma=.2,
                                                          size=nb_actions)
                agent.value = DDPGAgent(nb_actions=nb_actions,
                                        actor=actor,
                                        critic=critic,
                                        critic_action_input=action_input,
                                        memory=memory,
                                        nb_steps_warmup_critic=10,
                                        nb_steps_warmup_actor=10,
                                        random_process=random_process,
                                        gamma=.99,
                                        batch_size=5,
                                        target_model_update=1e-3,
                                        delta_clip=1.)
                agent.value.training = True
                clientLogger.info('rl agent init')

                PATH = '/home/user/WORK/NRP/NRP-local/Experiments/bf_manipulation_demo/ddpg_weights.h5'
                if os.path.isfile(PATH):
                    print('loading weights')
                    agent.load_weights(PATH)
                    clientLogger.info('weights loaded')

                agent.value.compile(Adam(lr=.001, clipnorm=1.),
                                    metrics=['mae'])
                clientLogger.info('agent compiled - ready to use')

#### run steps

#graph1.value = Graph()
    with graph1.value.as_default():
        #       session1.value = Session()
        with session1.value.as_default():

            import math
            import numpy as np

            angle_lower = links.value.pose[5].position.x
            angle_vel_lower = links.value.pose[7].position.x
            angle_upper = links.value.pose[9].position.x
            angle_vel_upper = links.value.pose[12].position.x
            #  clientLogger.info('humerus_angle ', links.value.pose[15].position.y)
            #  clientLogger.info('humerus_ang_vel ', angle_vel_lower)
            #  clientLogger.info('radius_angle ', angle_upper)
            #  clientLogger.info('radius_ang_vel ', angle_vel_lower)

            observation = np.array([
                math.cos(angle_lower),
                math.sin(angle_lower), angle_vel_lower,
                math.cos(angle_upper),
                math.sin(angle_upper), angle_vel_upper
            ])

            # get movement action from agent and publish to robot
            action = agent.value.forward(observation)
            clientLogger.info('agent stepped foward')

            # move robot
            joint1.send_message(std_msgs.msg.Float64(action[0]))
            joint2.send_message(std_msgs.msg.Float64(-action[1]))
            joint3.send_message(std_msgs.msg.Float64(action[2]))
            joint4.send_message(std_msgs.msg.Float64(action[3]))
            joint5.send_message(std_msgs.msg.Float64(action[4]))

            import math
            reward = \
            math.sqrt(math.pow((links.value.pose[57].position.x - links.value.pose[4].position.x),2) + \
            math.pow((links.value.pose[57].position.x - links.value.pose[4].position.x),2) + \
            math.pow((links.value.pose[57].position.x - links.value.pose[4].position.x),2))
            clientLogger.info('REWARD IS:', reward)
            rewarda_ros.send_message(reward)
            ## reward x müsste minimiert für runter!
            #-(angle_lower**2 + 0.1*angle_vel_lower**2 +
            #     angle_upper**2 + 0.1*angle_vel_upper**2 +
            #     0.001*np.sum(np.power(action, 2)))

            #learn from the reward
            agent.value.backward(reward)
            clientLogger.info('agent stepped backward')
            agent.value.step = agent.value.step + 1

            if agent.value.step % 20 == 0:
                clientLogger.info('saving weights')
                PATH = '/home/user/Desktop/keras_learning_weights/ddpg_weights_a.h5'
                agent.value.save_weights(PATH, overwrite=True)

            clientLogger.info('-------one step done')
Пример #29
0
flattened_observation = Flatten()(observation_input)
x = Dense(400)(flattened_observation)
x = Activation('relu')(x)
x = Concatenate()([x, action_input])
x = Dense(300)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.1)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=1000,
                  nb_steps_warmup_actor=1000,
                  random_process=random_process,
                  gamma=.99,
                  target_model_update=1e-3,
                  processor=MujocoProcessor())
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
x = Dense(1)(x)
x = Activation('linear')(x)

critic = Model(inputs=[action_input, observation_input], outputs=x)

opti_critic = Adam(lr=LR_CRITIC)

# #### SET UP THE AGENT #####
# Initialize Replay Buffer ##
memory = SequentialMemory(limit=REPLAY_BUFFER_SIZE, window_length=1)
# window_length : usefull for Atari game (cb d'images d'affilé on veut analysé (vitesse de la balle, etc..))

# Random process (exploration) ##
random_process = OrnsteinUhlenbeckProcess(theta=THETA,
                                          mu=0,
                                          sigma=SIGMA,
                                          size=action_size)

# Paramètres agent DDPG ##
agent = DDPGAgent(nb_actions=action_size,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  random_process=random_process,
                  gamma=DISC_FACT,
                  target_model_update=TARGET_MODEL_UPDATE,
                  batch_size=BATCH_SIZE)

agent.compile(optimizer=[opti_critic, opti_actor])