示例#1
0
def launch(args):
    # create the ddpg_agent
    task_and_robot_environment_name = rospy.get_param(
        '/fetch/task_and_robot_environment_name')
    # to register our task env to openai env.
    # so that we don't care the output of this method for now.
    env = StartOpenAI_ROS_Environment(task_and_robot_environment_name)
    # env = gym.make(args.env_name)

    # set random seeds for reproduce
    env.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank())
    torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    if args.cuda:
        torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank())
    # get the environment parameters
    env_params = get_env_params(env)
    # create the ddpg agent to interact with the environment
    ddpg_trainer = ddpg_agent(args, env, env_params)
    ddpg_trainer.learn()
示例#2
0
now = datetime.now()

params.log_dir += "{}".format(params.env_name)
params.model_dir += "{}".format(params.env_name)

rospy.init_node("start_her")
task_and_robot_environment_name = rospy.get_param(
    '/fetch/task_and_robot_environment_name')
# to register our task env to openai env.
# so that we don't care the output of this method for now.
env = StartOpenAI_ROS_Environment(task_and_robot_environment_name)

# params.max_action = env.action_space.high[0]
# params.num_action = env.action_space.shape[0]

# TODO: this is temp solution..... check openai's fetch's implementation!!
params.max_action = 0
params.num_action = 4

# set seed
env.seed(params.seed)
tf.random.set_random_seed(params.seed)

agent = DDPG(Actor, Critic, params.num_action, params)

replay_buffer = ReplayBuffer(params.memory_size)
reward_buffer = deque(maxlen=params.num_episodes)
summary_writer = tf.contrib.summary.create_file_writer(params.log_dir)
train_HER(agent, env, replay_buffer, reward_buffer, summary_writer)
示例#3
0
    nsteps = rospy.get_param("/turtlebot2/nsteps")
    running_step = rospy.get_param("/turtlebot2/running_step")
    """

    # Hyperparameters
    gamma = 0.79  # initially 0.99 discount factor
    seed = 543  # random seed
    n_epochs = 20  # number of epochs to test the trained model

    test_global_step = 0  # Global number of testing steps for tracking cummulative rewards in Tensorboard

    # If gpu is to be used
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Fix random seed (for reproducibility)
    env.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

    # Get number of actions from gym action space
    #n_inputs = env.observation_space.shape[0]
    n_inputs = 5
    n_actions = env.action_space.n

    policy_net = DQN(n_inputs, n_actions).to(device)
    policy_net.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    policy_net.eval()

    ####################################################################################################################
    #logdir = os.path.join("$HOME/python3_ws/src/turtle2_openai_ros_example/src/logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    basedir = os.path.dirname(__file__)