import time from mover import Mover if __name__ == "__main__": logger = logging.getLogger("learner") logger.setLevel(logging.INFO) fh = logging.FileHandler("logfile.txt") formatter = logging.Formatter( "%(levelname)s:%(thread)d:%(filename)s:%(funcName)s:%(asctime)s::%(message)s" ) fh.setFormatter(formatter) logger.addHandler(fh) mov = Mover("127.0.0.1", 10000) mov.init_client() print('ori' + str(mov.get_orientation())) print('pos' + str(mov.get_position())) mov.stop_simulation() # mov.stop_simulation() # perform one thousand steps for i in range(0, 10): rand_pos = np.random.uniform(-0.2, +0.2, (2, )) time.sleep(0.5) mov.start_simulation() mov.set_position_and_rotation(rand_pos, np.random.uniform(-np.pi, +np.pi)) # mov.set_target_vel(np.zeros(2)) print('ori' + str(mov.get_orientation())) print('pos' + str(mov.get_position()))
class MoverTurnEnv(object): def __init__(self, args): self.delta_theta = None # subprocess.Popen( # [args['vrep_exec_path'], '-h', '-gREMOTEAPISERVERSERVICE_' + str(args['server_port']) + '_FALSE_TRUE', # args['vrep_scene_file']]) self.mover = Mover(args['server_ip'], args['server_port']) self.mover.init_client() self.per_step_reward = args['per_step_reward'] self.final_reward = args['final_reward'] self.tolerance = args['tolerance'] self.server_ip, self.server_port = args['server_ip'], args['server_port'] self.spawn_radius = args['spawn_radius'] self.begin_pos = None self.goal = 0 def set_goal(self, delta_theta): self.delta_theta = delta_theta def reset_goal(self, delta_theta): self.delta_theta = delta_theta goal = self.mover.get_orientation()[0, -1].item() + self.delta_theta if goal > np.pi: goal -= 2 * np.pi elif goal < -np.pi: goal += 2 * np.pi self.goal = goal def start(self): # logging.getLogger("learner").info("ENV::START:goal:%f" % self.delta_theta) state = np.hstack((self.mover.get_joint_pos(), self.mover.get_joint_vel())) begin_angle = self.mover.start_orientation self.begin_pos = self.mover.start_pos self.goal = begin_angle + self.delta_theta if self.goal > np.pi: self.goal -= 2 * np.pi elif self.goal < -np.pi: self.goal += 2 * np.pi return state, np.asarray(self.delta_theta).reshape((1, -1)) def _is_terminal(self, current): diff = np.absolute(current - self.goal) if diff > np.pi: diff -= 2 * np.pi if diff < -np.pi: diff += 2 * np.pi if diff < self.tolerance: return True else: return False def get_state(self): return np.hstack((self.mover.get_joint_pos(), self.mover.get_joint_vel())) def step(self, action): # logging.getLogger("learner").info("stepping") self.mover.set_target_vel(action) new_state = np.hstack((self.mover.get_joint_pos(), self.mover.get_joint_vel())) new_orient = self.mover.get_orientation() new_goal = self.goal - new_orient[0, -1].item() if new_goal > np.pi: new_goal -= 2 * np.pi elif new_goal < -np.pi: new_goal += 2 * np.pi new_goal = np.asarray(new_goal).reshape((1, -1)) pos = self.mover.get_position() # if out of bounds, end with negative reward if abs(pos[0, 0]) > 12 or abs(pos[0, 1]) > 12: return new_state, new_goal, -10, True # logging.getLogger("learner").info("goal:%0.4f, cur:%0.4f" % (self.goal, new_orient[0, -1].item())) if abs(new_goal) < self.tolerance: disp_reward = -np.linalg.norm(self.mover.get_position()[0, 1:2] - self.begin_pos) vel_reward = -np.linalg.norm(new_state[0, 23:]) return new_state, new_goal, (vel_reward + disp_reward + self.final_reward), True else: return new_state, new_goal, self.per_step_reward, False def reset(self): logging.getLogger("learner").info("ENV::RESET") self.mover.set_target_vel(np.zeros(2)) self.mover.stop_simulation() time.sleep(0.5) self.mover.start_simulation() spawn_rad = np.random.uniform(-self.spawn_radius, +self.spawn_radius) angle = np.random.uniform(-np.pi, np.pi) self.mover.set_position_and_rotation(spawn_rad * np.asarray((np.cos(angle), np.sin(angle))), np.random.uniform(-np.pi, +np.pi))