Пример #1
0
def main(args):

    """
    Environment used in this code is Pendulum-v0 from OpenAI gym.

        States: cos(theta), sin(theta), theta_dt
        Actions: Force application between -2 to 2
        Reward: -(Θ^2 + 0.1*Θ_dt^2 + 0.001*action^2)

    Objective:  Pendulum is vertical, with 0 movement.
    Initialization: Starts at a random angle, and at a random velocity.
    End: After all the steps are exhausted
    """

    # Initialize saver
    saver = tf.train.Saver()

    with tf.Session() as sess:

        # Create the gym environment
        env = LinearSystem(nsim=args['max_episode_len'], model_type='MIMO', x0=np.array([1.333, 4]),
                           u0=np.array([3, 6]), xs=np.array([3.555, 4.666]), us=np.array([5, 7]), step_size=0.2)

        # Set all the random seeds for the random packages
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))
        tflearn.init_graph(seed=args['random_seed'])

        # Define all the state and action dimensions, and the bound of the action
        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high

        # Restore old model
        # saver.restore(sess, args['ckpt_dir'])

        # Initialize the actor and critic
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        # Initialize Ornstein Uhlenbeck Noise
        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim), dt=env.step_size,
                                                   random_seed=int(args['random_seed']))

        # sigma=np.array([1, 1])

        # Train the Actor-Critic Model
        replay_buffer, action_list = train(sess, env, args, actor, critic, actor_noise)

        # Save the model
        saver.save(sess, args['ckpt_dir'])

        return actor, critic, env, replay_buffer, action_list
Пример #2
0
def main(args):
    with tf.Session() as sess:

        env = gym.make(args['env'])
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high
        # Ensure action bound is symmetric
        assert (env.action_space.high == -env.action_space.low)

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        train(sess, env, args, actor, critic, actor_noise)