epsilon = 1


def policy_output_to_stochastic_action(output, action_space):
    global epsilon
    output = np.squeeze(output, axis=0)

    epsilon -= 1.0 / DDPG_CFG.greedy_accel_noise_steps

    greedy_noise = np.array([
        max(epsilon, 0) * greedy_function(output[0], 0.0, 0.60, 0.30),  # steer
        max(epsilon, 0) * greedy_function(output[1], 0.5, 1.00, 0.10),  # accel
        max(epsilon, 0) * greedy_function(output[2], -0.1, 1.00, 0.05)
    ])  # brake

    stochastic_action = greedy_noise + output
    bounded = np.clip(stochastic_action, action_space.low, action_space.high)
    return bounded


if __name__ == "__main__":
    tf.logging.info(
        "@@@  start ddpg training gym_torcs @@@ start time:{}".format(
            time.ctime()))
    # Generate a Torcs environment
    env_train = torcs_env_wrapper(vision=False,
                                  throttle=True,
                                  gear_change=False,
                                  port=3101)
    train(env_train, agent_action, eval_mode=False)
    # stochastic_action = output + noise_process.sample()
    # bound to torcs scope
    bounded = np.clip(stochastic_action, action_space.low, action_space.high)
    return bounded


if __name__ == "__main__":
    tf.logging.info(
        "@@@  start ddpg training gym_torcs @@@ start time:{}".format(
            time.ctime()))
    # Generate a Torcs environment
    env_train = torcs_env_wrapper(vision=False,
                                  throttle=True,
                                  gear_change=False,
                                  port=3101)
    # env_eval = torcs_env_wrapper(vision=True, throttle=True, gear_change=False,port=8888)
    #TODO rewrite.
    #steer, accel, brake .after greedy noise.
    #valid noise value can make gradients happy.
    mu = np.array([0, 0, 0])
    # x0=np.array([0, 0.5, -0.1])
    theta = np.array([0.15, 0.15, 0.15])
    sigma = np.array([0.3, 0.3, 0.3])
    # x0 = np.array([0.1, 0.3, 0.1])
    #TODO start equal exploration on steer, brake, accel.
    # x0 = np.array([-0.2, 0.0, 0.2])
    x0 = np.array([-0.2, -0.2, 0.2])
    noise_process = UO_Process(mu=mu, x0=x0, theta=theta, sigma=sigma, dt=1e-2)
    train(env_train, env_train, agent_action, noise_process)

tf.nn.conv2d_transpose()
Пример #3
0
    return bounded


if __name__ == "__main__":
    tf.logging.info(
        "@@@  start ddpg training gym_bipedal_walker_v2 @@@ start time:{}".
        format(time.ctime()))
    # Generate a Torcs environment
    train_env = gym.make(id='BipedalWalker-v2')

    eval_monitor = Monitor(gym.make(id='BipedalWalker-v2'),
                           directory=DDPG_CFG.eval_monitor_dir,
                           video_callable=lambda x: False,
                           resume=True)

    mu = np.array([0.0, 0.0, 0.0, 0.0])
    # x0=np.array([0, 0.5, -0.1])
    theta = np.array([0.15, 0.15, 0.15, 0.15])
    sigma = np.array([0.3, 0.3, 0.3, 0.3])
    # x0 = np.array([0.1, 0.3, 0.1])
    # TODO greedy accel in the begining
    x0 = np.array([
        -0.2,
        0.2,
        0.2,
        0.2,
    ])
    noise_process = UO_Process(mu=mu, x0=x0, theta=theta, sigma=sigma, dt=1e-2)

    train(train_env, eval_monitor, agent_action, noise_process)
import time
import tensorflow as tf
from gym_torcs_train_low_dim import torcs_env_wrapper
from low_dim_train.train_agent_low_dim import train

DDPG_CFG = tf.app.flags.FLAGS  # alias
DDPG_CFG.log_dir = 'eval/gym_torcs_low_dim/tf_log/'
DDPG_CFG.checkpoint_dir = 'eval/gym_torcs_low_dim/chk_pnt/'
DDPG_CFG.eval_monitor_dir = 'eval/gym_torcs_low_dim/eval_monitor/'


tf.logging.set_verbosity(tf.logging.INFO)


if __name__ == "__main__":
  tf.logging.info("@@@  start ddpg evaluation gym_torcs @@@ start time:{}".format(time.ctime()))
  # Generate a Torcs environment
  env = torcs_env_wrapper(vision=True, throttle=True, gear_change=False)
  train(env,None,eval_mode=True)