def launch(args): # create the ddpg_agent task_and_robot_environment_name = rospy.get_param( '/fetch/task_and_robot_environment_name') # to register our task env to openai env. # so that we don't care the output of this method for now. env = StartOpenAI_ROS_Environment(task_and_robot_environment_name) # env = gym.make(args.env_name) # set random seeds for reproduce env.seed(args.seed + MPI.COMM_WORLD.Get_rank()) random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) np.random.seed(args.seed + MPI.COMM_WORLD.Get_rank()) torch.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) if args.cuda: torch.cuda.manual_seed(args.seed + MPI.COMM_WORLD.Get_rank()) # get the environment parameters env_params = get_env_params(env) # create the ddpg agent to interact with the environment ddpg_trainer = ddpg_agent(args, env, env_params) ddpg_trainer.learn()
now = datetime.now() params.log_dir += "{}".format(params.env_name) params.model_dir += "{}".format(params.env_name) rospy.init_node("start_her") task_and_robot_environment_name = rospy.get_param( '/fetch/task_and_robot_environment_name') # to register our task env to openai env. # so that we don't care the output of this method for now. env = StartOpenAI_ROS_Environment(task_and_robot_environment_name) # params.max_action = env.action_space.high[0] # params.num_action = env.action_space.shape[0] # TODO: this is temp solution..... check openai's fetch's implementation!! params.max_action = 0 params.num_action = 4 # set seed env.seed(params.seed) tf.random.set_random_seed(params.seed) agent = DDPG(Actor, Critic, params.num_action, params) replay_buffer = ReplayBuffer(params.memory_size) reward_buffer = deque(maxlen=params.num_episodes) summary_writer = tf.contrib.summary.create_file_writer(params.log_dir) train_HER(agent, env, replay_buffer, reward_buffer, summary_writer)
nsteps = rospy.get_param("/turtlebot2/nsteps") running_step = rospy.get_param("/turtlebot2/running_step") """ # Hyperparameters gamma = 0.79 # initially 0.99 discount factor seed = 543 # random seed n_epochs = 20 # number of epochs to test the trained model test_global_step = 0 # Global number of testing steps for tracking cummulative rewards in Tensorboard # If gpu is to be used device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Fix random seed (for reproducibility) env.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) # Get number of actions from gym action space #n_inputs = env.observation_space.shape[0] n_inputs = 5 n_actions = env.action_space.n policy_net = DQN(n_inputs, n_actions).to(device) policy_net.load_state_dict(torch.load(MODEL_PATH, map_location=device)) policy_net.eval() #################################################################################################################### #logdir = os.path.join("$HOME/python3_ws/src/turtle2_openai_ros_example/src/logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) basedir = os.path.dirname(__file__)