# Initialize the Webots Supervisor. supervisor = Supervisor() timeStep = int(4 * supervisor.getBasicTimeStep()) # Create the arm chain from the URDF filename = None with tempfile.NamedTemporaryFile(suffix='.urdf', delete=False) as file: filename = file.name file.write(supervisor.getUrdf().encode('utf-8')) armChain = Chain.from_urdf_file(filename) # Initialize the arm motors. motors = [] for link in armChain.links: if 'motor' in link.name: motor = supervisor.getMotor(link.name) motor.setVelocity(1.0) motors.append(motor) # Get the arm and target nodes. target = supervisor.getFromDef('TARGET') arm = supervisor.getSelf() # Loop 1: Draw a circle on the paper sheet. print('Draw a circle on the paper sheet...') while supervisor.step(timeStep) != -1: t = supervisor.getTime() # Use the circle equation relatively to the arm base as an input of the IK algorithm. x = 0.25 * math.cos(t) + 1.1 y = 0.25 * math.sin(t) - 0.95
class RobotManager: def __init__(self, params): self.robot = Supervisor() self.robot_sup = self.robot.getFromDef("e-puck") self.robot_trans = self.robot_sup.getField("translation") self.robot_rotation = self.robot_sup.getField("rotation") self.compass = self.robot.getCompass("compass") self.motorLeft = self.robot.getMotor("left wheel motor") self.motorRight = self.robot.getMotor("right wheel motor") self.positionLeft = self.robot.getPositionSensor("left wheel sensor") self.positionRight = self.robot.getPositionSensor("right wheel sensor") self.params = params # real robot state self.x = [] self.y = [] self.theta = [] self.distance_sensors_info = [] # odometry estimation self.x_odometry = [] self.y_odometry = [] self.theta_odometry = [] self.sensorNames = [ 'ds0', 'ds1', 'ds2', 'ds3', 'ds4', 'ds5', 'ds6', 'ds7' ] # predicted data self.x_pred = [] self.y_pred = [] self.theta_pred = [] self.data_collector = DataCollector() self.movement_controller = MovementController() self.window_communicator = WindowCommunicator(self.robot) # predictors self.predictor = PredictorNNSensorsNotNormalized() self.predictorCoord = PredictorNNCoordinates() self.timestep = int(self.robot.getBasicTimeStep()) # particles filter initialization robot_initial_conf = RobotConfiguration( self.params.INIT_X, self.params.INIT_Y, self.convert_angle_to_xy_coordinates(self.params.INIT_ANGLE)) environment_conf = EnvironmentConfiguration(self.params.MAX_X, self.params.MAX_Y) self.particles_filter = ParticlesFilter(environment_conf, robot_initial_conf, self.predictor, self.params) self.movement_random = True pass def init_actuators(self): self.compass.enable(self.timestep) self.motorLeft.setPosition(float('inf')) self.motorRight.setPosition(float('inf')) self.positionRight.enable(self.timestep) self.positionLeft.enable(self.timestep) def robot_to_xy(self, x, y): return x + 1, y + 0.75 def xy_to_robot(self, x, y): return x - 1.00000, y - 0.750000 def init_robot_pos(self, x, y): x, y = self.xy_to_robot(x, y) self.robot_trans.setSFVec3f([y, 0, x]) self.robot_rotation.setSFRotation([0, 1, 0, 0]) self.robot_sup.resetPhysics() def get_bearing_degrees(self): north = self.compass.getValues() rad = np.arctan2(north[0], north[2]) bearing = (rad) / np.pi * 180 if bearing < 0.0: bearing += 360 bearing = 360 - bearing - 90 if bearing < 0.0: bearing += 360 return bearing def save_supervisor_coordinates(self): # true robot position information trans_info = self.robot_trans.getSFVec3f() # print('SUP COORD:', trans_info) x_coordinate, y_coordinate = self.robot_to_xy(trans_info[2], trans_info[0]) self.x.append(x_coordinate) self.y.append(y_coordinate) angle = self.get_bearing_degrees() self.theta.append(angle) def step(self): return self.robot.step(self.timestep) != -1 def save_odometry_coordinates(self, coordinate): # convert robot coordinates into global coordinate system self.x_odometry.append(coordinate.x) self.y_odometry.append(coordinate.y) self.theta_odometry.append( self.convert_angle_to_xy_coordinates(coordinate.theta)) def save_sensor_distances(self, distanceSensors): distances = [] for distanceSensor in distanceSensors: distance = distanceSensor.getValue() #there is no real messure. if distance == 10: distance = None distances.append(distance) self.distance_sensors_info.append(distances) def get_sensor_distance(self): # Read the sensors, like: distanceSensors = [] for sensorName in self.sensorNames: sensor = self.robot.getDistanceSensor(sensorName) sensor.enable(self.timestep) distanceSensors.append(sensor) return distanceSensors def convert_angle_to_xy_coordinates(self, angle): angle = angle * 180 / np.pi if angle < 0.0: angle = 360 + angle return angle def plot(self): # Enter here exit cleanup code. plt.ylim([0, self.params.MAX_Y]) plt.xlim([0, self.params.MAX_X]) plt.xlabel("x") plt.ylabel("y") plt.plot(self.x, self.y, label="real") plt.plot(self.x_odometry, self.y_odometry, label="odometry") plt.plot(self.x_pred, self.y_pred, 's', label="correction", marker='o') plt.title("Robot position estimation") plt.legend() plt.savefig("results/position.eps", format='eps') def move_robot_to_random_position(self): new_x = -(1 / 2 * self.params.MAX_X - 0.1) + np.random.random() * (self.params.MAX_X - 0.2) new_y = -(1 / 2 * self.params.MAX_Y - 0.1) + np.random.random() * (self.params.MAX_Y - 0.2) # old_z = robot_trans.getSFVec3f()[1] new_theta = np.random.random() * 2 * np.pi self.robot_trans.setSFVec3f([new_y, 0, new_x]) self.robot_rotation.setSFRotation([0, 1, 0, new_theta]) self.robot_sup.resetPhysics() def execute(self): self.init_actuators() self.init_robot_pos(self.params.INIT_X, self.params.INIT_Y) self.step() self.init_robot_pos(self.params.INIT_X, self.params.INIT_Y) self.window_communicator.sendInitialParams(self.params) odometry = Odometry( self.params.ENCODER_UNIT * (self.positionLeft.getValue()), self.params.ENCODER_UNIT * (self.positionRight.getValue()), self.params.INIT_X, self.params.INIT_Y, self.params.INIT_ANGLE) count = 0 particles = np.array([[], []]) last_move = 'none' errorPos = [] errorOdo = [] continue_running = True # principal robot step cycle while (continue_running): # if the number of steps is greater than EXPERIMENT_DURATION_STEPS then stop running if count == self.params.EXPERIMENT_DURATION_STEPS: continue_running = False # receive message from robot window message = self.window_communicator.receiveMessage() message = json.loads(message) if message else None if message and message['code'] == 'start_randomness': self.movement_random = True elif message and message['code'] == 'stop_randomness': self.movement_random = False elif message and message['code'] == 'params_modification': self.particles_filter.reset_particles( message["number_particles"], message["sigma_xy"], message["sigma_theta"], RobotConfiguration(self.x_pred[-1], self.y_pred[-1], self.theta_pred[-1])) # get odometry data odometry_info, delta_movement = odometry.track_step( self.params.ENCODER_UNIT * (self.positionLeft.getValue()), self.params.ENCODER_UNIT * (self.positionRight.getValue())) # transoform the angle to xy coordinates delta_movement[2] = self.convert_angle_to_xy_coordinates( delta_movement[2]) # for capturing robot positioning if not self.step() and self.params.CAPTURING_DATA: print('saving data') self.data_collector.collect( self.x, self.y, self.theta, np.array(self.distance_sensors_info)) self.plot() # get sensor distances distanceSensors = self.get_sensor_distance() # collect data: real, odometry, predicted self.save_sensor_distances(distanceSensors) self.save_odometry_coordinates(odometry_info) self.save_supervisor_coordinates() # calculate new velocity left_speed, right_speed = 0, 0 if self.movement_random: if self.params.GO_STRAIGHT_MOVE: left_speed, right_speed = self.movement_controller.calculate_velocity( distanceSensors) else: left_speed, right_speed = self.movement_controller.calculate_velocity_random_move( distanceSensors) last_move = 'none' else: # joystick control if message and message[ 'code'] == "move" and last_move != message['direction']: last_move = message['direction'] if last_move == 'UP': left_speed, right_speed = self.movement_controller.move_straight( ) elif last_move == 'DOWN': left_speed, right_speed = self.movement_controller.move_backwards( ) elif last_move == 'RIGHT': left_speed, right_speed = self.movement_controller.move_right( ) elif last_move == 'LEFT': left_speed, right_speed = self.movement_controller.move_left( ) self.motorLeft.setVelocity(left_speed) self.motorRight.setVelocity(right_speed) # predict position based on particles data if not self.params.CAPTURING_DATA and count % self.params.PRED_STEPS == 0 and count != 0: # get particles particles = self.particles_filter.get_particles( delta_movement, self.distance_sensors_info[-1], count % 2 == 0) # get weights sum weighted_sum = np.sum(particles[3]) # get weighted average from particles data x_prim = np.sum(particles[0] * particles[3]) / weighted_sum y_prim = np.sum(particles[1] * particles[3]) / weighted_sum theta_prim = np.sum(particles[2] * particles[3]) / weighted_sum # # get the position prediction given the sensor measurements # predicted_coord = predictorCoord.predict(distance_sensors_info[-1]) # # print(predicted_coord) # # combine both previous models # x_pred.append((x_prim + float(predicted_coord[0]))/2) # y_pred.append((y_prim + float(predicted_coord[1]))/2) self.x_pred.append(x_prim) self.y_pred.append(y_prim) self.theta_pred.append(theta_prim) # predictor.refit_models(x[-1], y[-1], theta[-1], distance_sensors_info[-1]) # calculate error if self.params.CALCULATE_PRED_ERROR: errorPos.append( np.sqrt((self.x[-1] - self.x_pred[-1])**2 + (self.y[-1] - self.y_pred[-1])**2)) if self.params.CALCULATE_ODO_ERROR: errorOdo.append( np.sqrt((self.x[-1] - self.x_odometry[-1])**2 + (self.y[-1] - self.y_odometry[-1])**2)) # send data to html page self.window_communicator.sendCoordinatesParticles( self.x, self.y, self.x_odometry, self.y_odometry, self.x_pred, self.y_pred, particles.tolist()) # move robot to a random position after a while if self.params.CAPTURING_DATA and count % self.params.MOVING_ROBOT_STEPS == 0: self.move_robot_to_random_position() count += 1 if self.params.CALCULATE_PRED_ERROR: print('Saving prediction SDE') pickle.dump(errorPos, open(self.params.PRED_ERROR_FILE, "wb")) if self.params.CALCULATE_ODO_ERROR: print('Saving odometry SDE') pickle.dump(errorOdo, open(self.params.ODO_ERROR_FILE, "wb"))
# Initialize the Webots Supervisor. supervisor = Supervisor() timeStep = int(4 * supervisor.getBasicTimeStep()) # Create the arm chain from the URDF filename = None with tempfile.NamedTemporaryFile(suffix='.urdf', delete=False) as file: filename = file.name file.write(supervisor.getUrdf().encode('utf-8')) armChain = Chain.from_urdf_file(filename) # Initialize the arm motors. motors = [] for link in armChain.links: if 'sensor' in link.name: motor = supervisor.getMotor(link.name.replace('sensor', 'motor')) motor.setVelocity(1.0) motors.append(motor) # Get the arm and target nodes. target = supervisor.getFromDef('TARGET') arm = supervisor.getSelf() # Loop 1: Draw a circle on the paper sheet. print('Draw a circle on the paper sheet...') while supervisor.step(timeStep) != -1: t = supervisor.getTime() # Use the circle equation relatively to the arm base as an input of the IK algorithm. x = 0.25 * math.cos(t) + 1.1 y = 0.25 * math.sin(t) - 0.95
bounds=[-2.18166, 2.0944], translation_vector=[0.929888, 0, 0], orientation=[0, 0, 0], rotation=[0, 1, 0]) ]) # Initialize the Webots Supervisor. supervisor = Supervisor() timeStep = int(4 * supervisor.getBasicTimeStep()) # Initialize the arm motors. motors = [] for motorName in [ 'A motor', 'B motor', 'C motor', 'D motor', 'E motor', 'F motor' ]: motor = supervisor.getMotor(motorName) motor.setVelocity(1.0) motors.append(motor) # Get the arm and target nodes. target = supervisor.getFromDef('TARGET') arm = supervisor.getSelf() # Loop 1: Draw a circle on the paper sheet. print('Draw a circle on the paper sheet...') while supervisor.step(timeStep) != -1: t = supervisor.getTime() # Use the circle equation relatively to the arm base as an input of the IK algorithm. x = 0.25 * math.cos(t) + 1.1 y = 0.25 * math.sin(t) - 0.95
class InvaderBot(): # Initalize the motors. def setup(self): self.robot = Supervisor() self.motor_left = self.robot.getMotor("motor_left") self.motor_right = self.robot.getMotor("motor_right") self.timestep = int(self.robot.getBasicTimeStep()) # Do one update step. Calls Webots' robot.step(). # Return True while simulation is running. # Return False if simulation is ended def step(self): return (self.robot.step(self.timestep) != -1) # Set the velocity of the motor [-1, 1]. def set_motor(self, motor, velocity): mult = 1 if velocity > 0 else -1 motor.setPosition(mult * float('+inf')) motor.setVelocity(velocity * motor.getMaxVelocity()) # Set the velocity of left and right motors [-1, 1]. def set_motors(self, left, right): self.set_left_motor(left) self.set_right_motor(right) # Set the velocity of left motors [-1, 1]. def set_left_motor(self, velocity): self.set_motor(self.motor_left, velocity) # Set the velocity of right motors [-1, 1]. def set_right_motor(self, velocity): self.set_motor(self.motor_right, velocity) # Returns the current simulation time in seconds def get_time(self): return self.robot.getTime() # Returns the position of the robot in [x, z, angle] format # The coordinate system is as follows (top-down view) # .-------------------->x # |\ (angle) # | \ # | \ # | \ # | \ # | \ # | # V # z # def get_position(self): subject = self.robot.getSelf() position = subject.getPosition() orientation = subject.getOrientation() orientation = math.atan2(orientation[0], orientation[2]) orientation = math.degrees(orientation) return [position[0], position[2], orientation] # Returns the position of the balls in the following format # [ # [ # [green_ball_0_x, green_ball_0_z], # [green_ball_1_x, green_ball_1_z] # ], # # [ # [yellow_ball_0_x, yellow_ball_0_z], # [yellow_ball_1_x, yellow_ball_1_z], # [yellow_ball_2_x, yellow_ball_2_z], # ] # ] def get_balls(self): green = [] green_root = self.robot.getFromDef("_green_balls").getField("children") for idx in reversed(range(green_root.getCount())): try: ball = green_root.getMFNode(idx) pos = ball.getPosition() green.append([pos[0], pos[2]]) except: pass yellow = [] yellow_root = self.robot.getFromDef("_yellow_balls").getField( "children") for idx in reversed(range(yellow_root.getCount())): try: ball = yellow_root.getMFNode(idx) pos = ball.getPosition() yellow.append([pos[0], pos[2]]) except: pass return [green, yellow]
# Initialize devices ps = [] psNames = ['ps0', 'ps1', 'ps2', 'ps3', 'ps4', 'ps5', 'ps6', 'ps7'] for i in range(8): ps.append(robot.getDistanceSensor(psNames[i])) ps[i].enable(timestep) cmpXY1 = robot.createCompass("compassXY_01") cmpXY1.enable(timestep) cmpZ1 = robot.createCompass("compassZ_01") cmpZ1.enable(timestep) # Initialize motors leftMotor = robot.getMotor('left wheel motor') rightMotor = robot.getMotor('right wheel motor') leftMotor.setPosition(float('inf')) rightMotor.setPosition(float('inf')) leftMotor.setVelocity(0.0) leftMotor.setVelocity(0.0) # Configuring parameters t = 0 turning = False dir = 0 vpre = 0 trial = 0 tLatency = 0 found = False
class EndP_env3D(object): """docstring for Robot_arm""" def __init__(self): # ---create robot as supervisor--- self.robot = Supervisor() # ---get simulation timestep--- self.timestep = int(self.robot.getBasicTimeStep()) '''Position Motors''' self.velocity = 1 self.dt = 0.032 ## to be modified : init_position '''2D 2Motor''' # self.rm1_init_position = 0 # self.rm4_init_position = -0.0698 # '''3D 4Motor train''' # self.rm2_init_position = 0 # self.rm3_init_position = 0 # self.rm5_init_position = 0 # self.rm6_init_position = 0 # self.rm7_init_position = 0 self.rm1_init_position = self.robot.getFromDef("Franka_Emika_Panda.joint01.j1p").getField('position').getSFFloat() print("rm1_initpos: {}".format(self.rm1_init_position)) self.rm2_init_position = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.j2p").getField('position').getSFFloat() self.rm3_init_position = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.joint23.j3p").getField('position').getSFFloat() self.rm4_init_position = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.joint23.link3.joint34.j4p").getField('position').getSFFloat() self.rm5_init_position = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.joint23.link3.joint34.link4.joint45.j5p").getField('position').getSFFloat() self.rm6_init_position = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.joint23.link3.joint34.link4.joint45.link5.joint56.j6p").getField('position').getSFFloat() self.rm7_init_position = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.joint23.link3.joint34.link4.joint45.link5.joint56.link6.joint67.j7p").getField('position').getSFFloat() '''Orientation Motor''' ## for franka self.P2 = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.M2P") self.P4 = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.joint23.link3.joint34.link4.M4P") self.ENDP = self.robot.getFromDef("Franka_Emika_Panda.joint01.link1.joint12.link2.joint23.link3.joint34.link4.joint45.link5.joint56.link6.joint67.link7.joint7H.hand.endEffector.endp") self.origin = np.array(self.P2.getPosition()) # self.elbowP = np.array(self.P4.getPosition()) self.endpointP = np.array(self.ENDP.getPosition()) self.edge = self.endpointP[0]-self.origin[0] print("edge : {}".format(self.edge)) print("NEW") self.beaker = self.robot.getFromDef('Beaker') print("beaker_ori: {}".format(self.beaker.getOrientation()[4])) ## to be modified : modify the random space to fit in our robot's work place ## we should use 3D random goal ## for franka ## use a box space for now #x_range = np.linspace(0.4, 0.7, 2) #x = np.random.choice(x_range) #z_range = np.linspace(-0.25, 0.25, 3) #z = np.random.choice(z_range) x = 0.4 z = 0.1 # y_range = np.linspace(0.0, 0.3, 3) # y = np.random.choice(y_range) y = 0.15 self.goal_position = self.robot.getFromDef('TARGET.tar').getPosition() print("goal : {}".format(self.goal_position)) '''3D goal''' self.goal_np_p = np.array(self.goal_position) d, _ = self._get_elbow_position() print("(elbow_local/self.edge) : {}".format(d)) # self.GOAL = self.robot.getFromDef('goal') # self.GOAL_P = self.GOAL.getField('translation') # self.GOAL_P.setSFVec3f(self.goal_position) ## for franka self.arm_motor_name = [ # 'motor1', 'motor2', 'motor3', 'motor4', 'motor5', 'motor6'] '''3D 4Motor''' self.arm_position = dict(zip(self.arm_motor_name, [ # self.rm1_init_position, self.rm2_init_position, self.rm3_init_position, self.rm4_init_position, self.rm5_init_position, self.rm6_init_position])) # self.rm7_init_position])) self.arm_motors, self.arm_ps = self.create_motors(self.arm_motor_name, self.arm_position) ## to be modified : set the position range self.arm_position_range = np.array([ # [-2.897, 2.897], [-1.763, 1.763], [-2.8973, 2.8973], [-3.072, -0.0698], [-2.8973, 2.8973], [-0.0175, 3.7525]]) # [-2.897, 2.897]]) self.act_mid = dict(zip(self.arm_motor_name, np.mean(self.arm_position_range, axis=1))) self.act_rng = dict(zip(self.arm_motor_name, 0.5*(self.arm_position_range[:, 1] - self.arm_position_range[:, 0]))) self.arm_position_range = dict(zip(self.arm_motor_name, [ # [-2.897, 2.897], [-1.763, 1.763], [-2.8973, 2.8973], [-3.072, -0.0698], [-2.8973, 2.8973], [-0.0175, 3.7525]])) # [-2.897, 2.897]])) '''Orientation Motors''' # ---create camera dict--- self.cameras = {} self.camera_name = ['vr_camera_r', # "camera_center" ] self.cameras = self.create_sensors(self.camera_name, self.robot.getCamera) # ---get image height and width--- self.camera_height = self.cameras["vr_camera_r"].getHeight() self.camera_width = self.cameras["vr_camera_r"].getWidth() # ---set game over for game start--- self.done = False self.lives = 0 # ---action set for motor to rotate clockwise/counterclockwise or stop self.action_num = len(self.arm_motors) print("action_num: {}".format(self.action_num)) self.action_space = spaces.Box(-1, 1, shape=(self.action_num,), dtype=np.float32) # self.action_space = spaces.Box(-0.016, 0.016, shape=(self.action_num, ), dtype=np.float32) self.on_goal = 0 # self.crash = 0 self.accumulated_reward = 0 self.episode_len_reward = 0 self.episode_ori_reward = 0 self.arrival_time = 0 # print(self.ENDP.getOrientation()) self.robot.step(self.timestep) self.start_time = self.getTime() '''constant''' self.NEAR_DISTANCE = 0.05 self.BONUS_REWARD = 10 self.ON_GOAL_FINISH_COUNT = 5 self.MAX_STEP = 100 '''variable''' self.finished_step = 0 self.prev_d = self.goal_np_p - np.array(self.ENDP.getPosition()) self.left_bonus_count = self.ON_GOAL_FINISH_COUNT def reset_eval_to_down(self): self.on_goal = 0 # self.crash = 0 self.accumulated_reward = 0 self.episode_len_reward = 0 self.episode_ori_reward = 0 self.arrival_time = 0 self.finished_step = 0 self.prev_d = self.goal_np_p - np.array(self.ENDP.getPosition()) self.left_bonus_count = self.ON_GOAL_FINISH_COUNT self.start_time = self.getTime() self.done = False self.lives = 0 def _get_robot(self): return self.robot def create_motors(self, names, position): obj = {} ps_obj = {} for name in names: motor = self.robot.getMotor(name) motor.setVelocity(self.velocity) motor.setPosition(position[name]) ps = motor.getPositionSensor() ps.enable(self.timestep) obj[name] = motor ps_obj[name] = ps return obj, ps_obj def create_sensors(self, names, instant_func): obj = {} for name in names: sensor = instant_func(name) sensor.enable(self.timestep) obj[name] = sensor return obj def set_goal_position(self, arg): #x_range = np.linspace(0.4, 0.7, 2) #x = np.random.choice(x_range) #z_range = np.linspace(-0.25, 0.25, 3) #z = np.random.choice(z_range) # y_range = np.linspace(0.0, 0.3, 3) # y = np.random.choice(y_range) x = 0.5665 z = 0.0241 y = 0.66 # self.goal_position = [x, y, z] self.goal_position = self.robot.getFromDef('TARGET.tar').getPosition() if arg=='down': self.goal_position = [x, y, z] self.robot.getFromDef('TARGET').getField('translation').setSFVec3f(self.goal_position) self.goal_np_p = np.array(self.goal_position) # self.GOAL_P.setSFVec3f(self.goal_position) self.robot.step(self.timestep) def _get_image(self): return self.cameras["vr_camera_r"].getImage() def _get_obs(self): return self._get_image() def _get_motor_position(self): motor_position = [] for name in self.arm_motor_name: data = self.arm_ps[name].getValue() # normalized data = (data - self.act_mid[name])/self.act_rng[name] motor_position.append(data) return motor_position def _get_endpoint_orientation(self): orientation = np.array(self.ENDP.getOrientation()) # print(orientation[::4]) d = self.on_goal - orientation[::4] return orientation.tolist(), (d/2).tolist() def _get_state(self): state = [] '''Position''' elp, elbd = self._get_elbow_position() arm_p = self._get_motor_position() ep, ed = self._get_endpoint_position() ## to be modified : choose what to be our state # state += elp+ep+elbd+ed+[1. if self.on_goal else 0.] state += arm_p + ed '''Orientation''' # orient, d = self._get_endpoint_orientation() # state += orient+d+[1. if self.on_goal else 0.] return state def _get_elbow_position(self): goal = self.goal_np_p '''3D Position Distance''' ## to be modified : set elbow; why edge? elbow_global = np.array(self.P4.getPosition()) elbow_local = elbow_global - self.origin d = (goal - elbow_global) return (elbow_local/self.edge).tolist(), (d/self.edge).tolist() def _get_endpoint_position(self): goal = self.goal_np_p '''3D Position Distance''' end_global = np.array(self.ENDP.getPosition()) end_local = end_global - self.origin d = goal - end_global return (end_local/self.edge).tolist(), (d/self.edge).tolist() ## to be modified def _get_reward(self): '''Position Reward''' _, d = self._get_endpoint_position() d = np.array(d)*self.edge # d = np.array(d)*0.565*2 '''3D reward''' # r = -np.sqrt(d[0]**2+d[1]**2+d[2]**2) r = np.linalg.norm(self.prev_d) - np.linalg.norm(d) distance = np.linalg.norm(d) if distance < self.NEAR_DISTANCE : if self.left_bonus_count > 0 : bonus = self.BONUS_REWARD * (self.MAX_STEP - 2*self.finished_step) / self.MAX_STEP r += bonus self.left_bonus_count -= 1 print('bonus: ', bonus) self.on_goal += 1 print('on goal: ', self.on_goal) if self.on_goal >= self.ON_GOAL_FINISH_COUNT : self.done = True print('ON GOAL') else: self.on_goal = 0 print('on goal: 0') self.episode_len_reward += r self.orientation_reward = 1000*(self.beaker.getOrientation()[4] - 0.001) # print("ori: {}".format(self.beaker.getOrientation()[4])) print("ori_reward: {}".format(self.orientation_reward)) r += self.orientation_reward self.episode_ori_reward += self.orientation_reward self.accumulated_reward += r self.prev_d = d self.finished_step += 1 return r @property def obsrvation_space(self): return spaces.Box(low=0, high=255, shape=(self.camera_height, self.camera_width, 3), dtype=np.uint8) # for rgb def action_space(self): return spaces.Box(-1, 1, shape=(self.action_num, ), dtype=np.float32) def get_lives(self): return self.lives def getTime(self): return self.robot.getTime() def reset(self): fp = open("Episode-len-score.txt","a") fp.write(str(self.episode_len_reward)+'\n') fp.close() fp = open("Episode-orientation-score.txt","a") fp.write(str(self.episode_ori_reward)+'\n') fp.close() fp = open("Episode-score.txt","a") fp.write(str(self.accumulated_reward)+'\n') fp.close() self.robot.worldReload() def step(self, action_dict): reward = 0 action_dict = np.clip(action_dict, -1, 1) for name, a in zip(self.arm_motor_name, action_dict): self.arm_position[name] += a * self.dt if self.arm_position_range[name][0] > self.arm_position[name]: self.arm_position[name] = self.arm_position_range[name][0] elif self.arm_position_range[name][1] < self.arm_position[name]: self.arm_position[name] = self.arm_position_range[name][1] self.arm_motors[name].setPosition(self.arm_position[name]) self.robot.step(self.timestep) reward = self._get_reward() state = self._get_state() obs = False return obs, state, reward, self.done, {"goal_achieved":self.on_goal>0}
import numpy as np from bisect import insort_left import matplotlib.pyplot as plt import pandas as pd import matplotlib.pyplot as plt import pickle TIME_STEP = 48 supervisor = Supervisor() node = supervisor.getSelf() #get motors RightForelegJ1 = supervisor.getMotor('PRM:/r4/c1-Joint2:41') RightForelegJ2 = supervisor.getMotor('PRM:/r4/c1/c2-Joint2:42') RightForelegJ3 = supervisor.getMotor('PRM:/r4/c1/c2/c3-Joint2:43') RightHindlegJ1 = supervisor.getMotor('PRM:/r5/c1-Joint2:51') RightHindlegJ2 = supervisor.getMotor('PRM:/r5/c1/c2-Joint2:52') RightHindlegJ3 = supervisor.getMotor('PRM:/r5/c1/c2/c3-Joint2:53') LeftForelegJ1 = supervisor.getMotor('PRM:/r2/c1-Joint2:21') LeftForelegJ2 = supervisor.getMotor('PRM:/r2/c1/c2-Joint2:22') LeftForelegJ3 = supervisor.getMotor('PRM:/r2/c1/c2/c3-Joint2:23') LeftHindlegJ1 = supervisor.getMotor('PRM:/r3/c1-Joint2:31') LeftHindlegJ2 = supervisor.getMotor('PRM:/r3/c1/c2-Joint2:32') LeftHindlegJ3 = supervisor.getMotor('PRM:/r3/c1/c2/c3-Joint2:33')
""" PORTERÍA """ port = supervisor.getFromDef("Porteria") posPo = port.getField("translation") """ JUGADOR """ r = 0.0205 l = 0.0355 a = 0.0355 rP = 0.1 MAX_SPEED = 6.28 player = supervisor.getFromDef("Player") posP = player.getField("translation") orP = player.getField("rotation") # get a handler to the motors and set target position to infinity (speed control) leftMotor = supervisor.getMotor('left wheel motor') rightMotor = supervisor.getMotor('right wheel motor') leftMotor.setPosition(float('inf')) rightMotor.setPosition(float('inf')) leftMotor.setVelocity(0) rightMotor.setVelocity(0) ## Posiciones iniciales aleatorias ## sizeX = sizeVec[0] - 2*rP - 0.5 sizeY = sizeVec[1] - 2*rP - 0.5 # Pelota xB = random.random()*sizeX - sizeX/2 yB = random.random()*sizeY - sizeY/2 posB.setSFVec3f([xB, 0.05, yB])
class Botsy(): # Initalize the motors. def setup(self): self.robot = Supervisor() self.motor_left = self.robot.getMotor("motor_left") self.motor_right = self.robot.getMotor("motor_right") self.timestep = int(self.robot.getBasicTimeStep()) # Do one update step. Calls Webots' robot.step(). # Return True while simulation is running. # Return False if simulation is ended def step(self): return (self.robot.step(self.timestep) != -1) # Set the velocity of the motor [-1, 1]. def set_motor(self, motor, velocity): mult = 1 if velocity > 0 else -1 motor.setPosition(mult * float('+inf')) motor.setVelocity(velocity * motor.getMaxVelocity()) # Set the velocity of left and right motors [-1, 1]. def set_motors(self, left, right): self.set_left_motor(left) self.set_right_motor(right) # Set the velocity of left and right motors [-1, 1]. def turn_right(self, speed): self.set_left_motor(speed) self.set_right_motor(-speed) def turn_left(self, speed): self.set_left_motor(-speed) self.set_right_motor(speed) # Set the velocity of left motors [-1, 1]. def set_left_motor(self, velocity): self.set_motor(self.motor_left, velocity) # Set the velocity of right motors [-1, 1]. def set_right_motor(self, velocity): self.set_motor(self.motor_right, velocity) # Returns the current simulation time in seconds def get_time(self): return self.robot.getTime() # Returns the position of the robot in [x, z, angle] format # The coordinate system is as follows (top-down view) # .-------------------->x # |\ (angle) # | \ # | \ # | \ # | \ # | \ # | # V # z # def get_position(self): subject = self.robot.getSelf() position = subject.getPosition() orientation = subject.getOrientation() orientation = math.atan2(orientation[0], orientation[2]) return (position[0], position[2], orientation) # Returns the position of the green balls def get_green_balls(self): balls = [] balls_root = self.robot.getFromDef("_balls").getField("children") green_balls = balls_root.getMFNode(1).getField("children") for i in range(green_balls.getCount()): ball = green_balls.getMFNode(i) position = ball.getPosition() balls.append([position[0], position[1]]) # Returns the position of the green balls def get_yellow_balls(self): balls = [] balls_root = self.robot.getFromDef("_balls").getField("children") green_balls = balls_root.getMFNode(1).getField("children") yellow_balls = balls_root.getMFNode(0).getField("children") for i in range(yellow_balls.getCount()): ball = yellow_balls.getMFNode(i) position = ball.getPosition() balls.append([position[0], position[1]]) return balls
timestep = int(super_visor.getBasicTimeStep()) # 设置机器人的目标 target = torch.tensor([-0.5, 0.0, 0.0]).unsqueeze(0) # 获取机器人的node和field root = super_visor.getRoot() children = root.getField("children") n = children.getCount() robot_node = children.getMFNode(n - 1) translation_field = robot_node.getField("translation") rotation_field = robot_node.getField("rotation") # 初始化左右引擎 left_motor = super_visor.getMotor("left wheel motor") right_motor = super_visor.getMotor("right wheel motor") left_motor.setPosition(float("inf")) right_motor.setPosition(float("inf")) left_motor.setVelocity(0.0) right_motor.setVelocity(0.0) # 设置state, action和agent action_space = 2 state_space = 4 max_reward = torch.tensor([2.0]) agent = Agent(state_space, action_space) # 机器人开始运行 robot_name = robot_node.getDef() print("Robot {} starts!".format(robot_name))
ENCODER_UNIT = 159.23 INIT_X = 0.0 INIT_Y = 0.0 INIT_ANGLE = 0 PRED_STEPS = 450 correction_x = 0 correction_y = 0 correction_theta = 0 # create the Robot instance. robot = Supervisor() robot_sup = robot.getFromDef("e-puck") robot_trans = robot_sup.getField("translation") compass = robot.getCompass("compass") motorLeft = robot.getMotor("left wheel motor") motorRight = robot.getMotor("right wheel motor") positionLeft = robot.getPositionSensor("left wheel sensor") positionRight = robot.getPositionSensor("right wheel sensor") predictor = Predictor() timestep = int(robot.getBasicTimeStep()) x = [] y = [] theta = [] distance_sensors_info = [] x_odometry = []
class AutobinEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self, time_step): # time step in ms self.time_step = time_step # supevisor node self.robot = Supervisor() # self node to get velocity and position self.bin = self.robot.getSelf() # cameras self.cameras = [] camerasNames = ['camera1', 'camera2'] for name in camerasNames: self.cameras.append(self.robot.getCamera(name)) self.cameras[-1].enable(self.time_step) self.img_size = (self.cameras[0].getWidth(), self.cameras[0].getHeight()) # wheels self.wheels = [] wheelsNames = ['wheel1', 'wheel2', 'wheel3', 'wheel4'] for name in wheelsNames: self.wheels.append(self.robot.getMotor(name)) self.wheels[-1].setPosition(float('inf')) self.wheels[-1].setVelocity(0.0) # ball self.ball = self.robot.getFromDef('Ball') def step(self, action): self.wheels[0].setVelocity(action[0]) self.wheels[1].setVelocity(action[1]) self.wheels[2].setVelocity(action[0]) self.wheels[3].setVelocity(action[1]) # step the world if self.robot.step(self.time_step) == -1: print('The world finishes!') return None imgs = [ Image.frombytes('RGB', self.img_size, cam.getImage()) for cam in self.cameras ] vel = self.bin.getVelocity() pos = self.bin.getPosition() done = False reward = 0 contact = self.ball.getNumberOfContactPoints() if contact > 0: vel_len = math.sqrt(vel[0] * vel[0] + vel[1] * vel[1]) if self.bin.getNumberOfContactPoints() > 0: reward = 20 - vel_len print( f'Autobin successfully catches the ball! Reward is {reward:.3f}' ) else: contact_pos = self.ball.getContactPoint(0) dx = contact_pos[0] - pos[0] dy = contact_pos[1] - pos[1] dist = math.sqrt(dx * dx + dy * dy) reward = -vel_len - dist print(f'Autobin misses ball! Reward is {reward:.3f}') self.reset() return imgs, reward, done, {'velocity': vel, 'position': pos} def reset(self): pos = self.ball.getField('translation') pos.setSFVec3f([0, 5, 0]) max_vel = 2 theta = random.random() * 2 * math.pi vel_x = max_vel * math.cos(theta) vel_y = max_vel * math.sin(theta) self.ball.setVelocity([vel_x, vel_y, 0, 0, 0, 0]) def render(self, mode='human'): pass
import struct import pickle import numpy as np import math import cv2 import os #robot = Robot() supervisor = Supervisor() connector = None timestep = 100 conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM) motors = [ supervisor.getMotor("shoulder_pan_joint"), supervisor.getMotor("shoulder_lift_joint"), supervisor.getMotor("elbow_joint"), supervisor.getMotor("wrist_1_joint"), supervisor.getMotor("wrist_2_joint"), supervisor.getMotor("wrist_3_joint") ] #motor_sensors = [robot.getPositionSensor("shoulder_pan_joint_sensor"), robot.getPositionSensor("shoulder_lift_joint_sensor"), robot.getPositionSensor("elbow_joint_sensor"), #robot.getPositionSensor("wrist_1_joint_sensor"), robot.getPositionSensor("wrist_2_joint_sensor"), robot.getPositionSensor("wrist_3_joint_sensor")] camera = supervisor.getCamera('cameraRGB') depth_camera = supervisor.getRangeFinder('cameraDepth') ur_node = supervisor.getFromDef('UR5') camera_transform_node = ur_node.getField('children').getMFNode(0) camera_node = camera_transform_node.getField('children').getMFNode(1) #depth_camera.enable(35)
#Enabling keyboard to control different funcitons keyboard = Keyboard() keyboard.enable(TIME_STEP) #initializing distanse sensors ds = [] dsNames = ['ds_right', 'ds_left', 'ds_back_left', 'ds_back_right'] for i in range(4): ds.append(supervisor.getDistanceSensor(dsNames[i])) ds[i].enable(TIME_STEP) #initializing wheels wheels = [] wheelsNames = ['wheel1', 'wheel2', 'wheel3', 'wheel4'] for i in range(4): wheels.append(supervisor.getMotor(wheelsNames[i])) wheels[i].setPosition(float('inf')) wheels[i].setVelocity(0.0) avoidObstacleCounter = 0 #initializing communications fasheqi = supervisor.getEmitter('emitter') fasheqi.setChannel(1) fasheqi.setRange(-1) #initializing IMU imu = supervisor.getInertialUnit('imu_angle') imu.enable(TIME_STEP) count = 0 #imu stuff pI = 3.14159265
class P7RLEnv(gym.Env): metadata = {'render.modes': ['human']} motors = [] timeStep = 32 restrictedGoals = True boxEnv = False isFixedGoal = False fixedGoal = [3.5, 0] logging = True maxTimestep = 20000 robotResetLocation = [0, 0, 0] def __init__(self): # Initialize TurtleBot specifics self.maxAngSpeed = 1 # 2.84 max self.maxLinSpeed = 0.14 # 0.22 max # Initialize reward parameters: self.moveTowardsParam = 1 self.moveAwayParam = 1.1 self.safetyLimit = 0.5 self.obsProximityParam = 0.01 self.faceObstacleParam = 10 # Total reward counter for each component self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 # Initialize RL specific variables self.rewardInterval = 1 # How often do you want to get rewards self.epConc = '' # Conculsion of episode [Collision, Success, TimeOut] self.reward = 0 # Reward gained in the timestep self.totalreward = 0 # Reward gained throughout the episode self.counter = 0 # Timestep counter self.needReset = True # self.state = [] # State of the environment self.done = False # If the episode is done self.action = [0, 0] # Current action chosen self.prevAction = [0, 0] # Past action chosen self.goal = [] # Goal self.goals = [x / 100 for x in range(-450, 451, 150)] self.dist = 0 # Distance to the goal self.prevDist = 0 # Previous distance to the goal # Logging variables self.logDir = "" self.path = "" self.currentEpisode = 0 # Current episode number self.start = 0 # Timer for starting self.duration = 0 # Episode duration in seconds self.epInfo = [] # Logging info for the episode self.startPosition = [] self.prevMax = 0 # temporary variable # Initialize a supervisor self.supervisor = Supervisor() # Initialize robot self.robot = self.supervisor.getFromDef('TB') self.translationField = self.robot.getField('translation') self.orientationField = self.robot.getField('rotation') # Initialize lidar self.lidar = self.supervisor.getLidar('TurtleBotLidar') self.lidar.enable(self.timeStep) self.lidarRanges = [] # Initialize Motors self.motors.append(self.supervisor.getMotor("left wheel motor")) self.motors.append(self.supervisor.getMotor("right wheel motor")) self.motors[0].setPosition(float("inf")) self.motors[1].setPosition(float("inf")) self.motors[0].setVelocity(0.0) self.motors[1].setVelocity(0.0) self.direction = [] self.position = [] self.orientation = [] # Initialize Goal Object self.goalObject = self.supervisor.getFromDef('GOAL') self.goalTranslationField = self.goalObject.getField('translation') # Initialize action-space and observation-space self.action_space = spaces.Box( low=np.array([-self.maxLinSpeed, -self.maxAngSpeed]), high=np.array([self.maxLinSpeed, self.maxAngSpeed]), dtype=np.float32) self.observation_space = spaces.Box(low=-10, high=10, shape=(24, ), dtype=np.float16) def reset(self): if not self.logDir: self._getLogDirectory() self._startEpisode() # Reset evetything needs resetting self.supervisor.step(1) # Make it Happen self.goal = self._setGoal() # Set Goal self._resetObject(self.goalObject, [self.goal[0], 0.01, self.goal[1]]) self._getState() self.startPosition = self.position[:] self.state = [ self.prevAction[0], self.prevAction[1], self.dist, self.direction ] + self.lidarRanges # Set State return np.asarray(self.state) def step(self, action): self.prevAction = self.action[:] # Set previous action self.action = action # Take action self._take_action(action) self.supervisor.step(1) self.counter += 1 # Get State(dist from obstacle + lidar) and convert to numpyarray self._getState() # Observe new state self.state = np.asarray([ self.prevAction[0], self.prevAction[1], self.dist, self.direction ] + self.lidarRanges) self.reward = self._calculateReward() # get Reward self.done, extraReward = self._isDone( ) # determine if state is Done, extra reward/punishment self.reward += extraReward self.totalreward += self.reward return [self.state, self.reward, self.done, {}] def _trimLidarReadings(self, lidar): lidarReadings = [] new_lidars = [x if x != 0 else 3.5 for x in lidar] # Replace 0's with 3.5 (max reading) for x in range(0, 360, 18): end = x + 18 lidarReadings.append(min( new_lidars[x:end])) # Take the minimum of lidar Readings return lidarReadings def _resetObject(self, object, coordinates): object.getField('translation').setSFVec3f(coordinates) object.getField('rotation').setSFRotation([0, 1, 0, 0]) object.resetPhysics() def _setGoal(self): if not self.isFixedGoal: while True: if self.restrictedGoals: gs = random.sample(self.goals, 1) gs.append(random.randrange(-45, 45) / 10) g = gs[:] if np.random.randint(2) == 0 else [gs[1], gs[0]] elif self.boxEnv: xGoal = random.randrange( -40, -25) / 10 if np.random.randint( 2) == 0 else random.randrange(25, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [yGoal, xGoal ] if np.random.randint(2) == 0 else [xGoal, yGoal] else: xGoal = random.randrange(-40, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [xGoal, yGoal] pos1 = self.robot.getPosition()[0] pos2 = self.robot.getPosition()[2] distFromGoal = ((g[0] - pos1)**2 + (g[1] - pos2)**2)**0.5 if distFromGoal >= 1: break else: g = self.fixedGoal print('Goal set with Coordinates < {}, {} >'.format(g[0], g[1])) return g def _getDist(self): return ((self.goal[0] - self.position[0])**2 + (self.goal[1] - self.position[1])**2)**0.5 def _take_action(self, action): if self.logging: print( '\t\033[1mLinear velocity:\t{}\n\tAngular velocity:\t{}\n\033[0m' .format(action[0], action[1])) convertedVelocity = self.setVelocities(action[0], action[1]) self.motors[0].setVelocity(float(convertedVelocity[0])) self.motors[1].setVelocity(float(convertedVelocity[1])) def _isDone(self): if self.counter >= self.maxTimestep: # Check maximum timestep self.needReset = True self._endEpisode('timeout') return True, 0 minScan = min(list(filter(lambda a: a != 0, self.lidarRanges[:]))) # Check LiDar if minScan < 0.25: self.needReset = True self._endEpisode('collision') return True, -500 elif self.dist < 0.35: # Check Goal self.needReset = True if self.boxEnv else False self._endEpisode('success') return True, 500 else: return False, 0 def render(self, mode='human', close=False): pass def setVelocities(self, linearV, angularV): R = 0.033 # Wheel radius L = 0.138 # Wheelbase length vr = (2 * linearV + angularV * L) / (2 * R) vl = (2 * linearV - angularV * L) / (2 * R) #print('\nCommanded linear:\t{} angular:\t {}'.format(linearV, angularV)) #print('Calculated right wheel:\t{}, left wheel:\t {}'.format(vr, vl)) return [vl, vr] def _getDirection(self): # Get direction of goal from Robot robgoaly = self.goal[1] - self.position[1] robgoalx = self.goal[0] - self.position[0] goal_angle = math.atan2(robgoalx, robgoaly) # Get difference between goal direction and orientation heading = goal_angle - self.orientation if heading > pi: # Wrap around pi heading -= 2 * pi elif heading < -pi: heading += 2 * pi return heading def _calculateReward(self): action1 = np.round(self.action[0], 1) action2 = np.round(self.action[1], 1) if self.counter % self.rewardInterval != 0: return 0 # Check if it's Reward Time distRate = self.prevDist - self.dist if self.prevDist == 0: self.prevDist = self.dist return 0 moveTowardsGoalReward = self._rewardMoveTowardsGoal(distRate) self.moveTowardGoalTotalReward += moveTowardsGoalReward obsProximityPunish = self._rewardObstacleProximity() self.obsProximityTotalPunish += obsProximityPunish faceObstaclePunish = -self._rewardFacingObstacle() self.faceObstacleTotalPunish += faceObstaclePunish reward = moveTowardsGoalReward + obsProximityPunish + faceObstaclePunish #self._printMax(moveTowardsGoalReward) totalRewardDic = { "faceObstacleTotalPunish": self.faceObstacleTotalPunish, "obsProximityTotalPunish": self.obsProximityTotalPunish, "moveTowardGoalTotalReward": self.moveTowardGoalTotalReward } rewardDic = { "faceObstaclePunish": faceObstaclePunish, "obsProximityPunish": obsProximityPunish, "moveTowardsGoalReward": moveTowardsGoalReward, "Total": reward, "\033[1mTotalReward\033[0m": self.totalreward } if self.logging: self._printRewards(rewardDic) self._printRewards(totalRewardDic) self.prevDist = self.dist return reward def _rewardFacingObstacle(self): rewardFaceObs = 0 # Check forward lidars if moving forward, backward lidars if moving backward lidarRange = [-2, 2] if self.action[0] >= 0 else [7, 11] for i in range(lidarRange[0], lidarRange[1]): scale = 0.15 if i in [-2, 1, 7, 10 ] else 0.35 #Side readings get less weight rewardFaceObs += scale * ( 1 - (self.lidarRanges[i] / self.safetyLimit) ) if self.lidarRanges[i] < self.safetyLimit else 0 return self.faceObstacleParam * rewardFaceObs def _rewardMoveTowardsGoal(self, distRate): if distRate <= 0: return self.moveAwayParam * (distRate / 0.0044) else: return self.moveTowardsParam * (distRate / 0.0044) def _rewardObstacleProximity(self): closestObstacle = min(self.lidarRanges) if closestObstacle <= self.safetyLimit: return self.obsProximityParam * -( 1 - (closestObstacle / self.safetyLimit)) else: return 0 def SaveAndQuit(self): self._write_file('eps.txt', self.currentEpisode) with open(self.path + 'P7_NoObstacles.txt', 'a+') as f: print('\nSave and Quit \n') f.write(repr(self.epInfo)) f.write('\n') f.close() print('\tEpisodes Saved') self.supervisor.worldReload() def _write_file(self, filename, intval): with open(self.path + filename, 'w') as fp: print(f'Episodes saved: {intval}') fp.write(str(intval)) fp.close() def _read_file(self, filename): with open(self.path + filename) as fp: return int(fp.read()) def _startEpisode(self): if self.start != 0: self._logEpisode() # Read episode number if self.currentEpisode == 0 and os.path.exists(self.path + 'eps.txt'): self.currentEpisode = self._read_file('eps.txt') else: self.currentEpisode += 1 print('\n\t\t\t\t EPISODE No. {} \n'.format(self.currentEpisode)) self.start = time.time() if self.needReset: # Reset object(s) / counters / rewards print('Resetting Robot...') self._resetObject(self.robot, self.robotResetLocation) self.counter = 0 self.totalreward = 0 self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 self.prevDist = 0 def _endEpisode(self, ending): self.epConc = ending prevMode = self.supervisor.simulationGetMode() self.supervisor.simulationSetMode(0) self.supervisor.exportImage( self.logDir + "screenshots/" + ending + "/" + str(self.currentEpisode) + ".jpg", 100) self.supervisor.simulationSetMode(prevMode) print(ending) def _getState(self): self.lidarRanges = self._trimLidarReadings( self.lidar.getRangeImage()) # Get LidarInfo #if self.logging: self._printLidarRanges() self.position = self.robot.getPosition( )[::2] # Get Position (x,y) and orientation self.orientation = self.orientationField.getSFRotation()[3] self.dist = self._getDist() # Get Eucledian distance from the goal self.direction = self._getDirection() def _printRewards(self, r, interval=1): if self.counter % interval == 0: print("\n\033[1mEpisode {}, timestep {}\033[0m".format( self.currentEpisode, self.counter)) for k, v in r.items(): print("\t" + str(k) + ":\t" + str(v)) def _printMax(self, reward): maxR = reward if self.prevMax < reward else self.prevMax self.prevMax = maxR print("max: {}".format(maxR)) def _printLidarRanges(self): lidarFormatted = [] for i in range(len(self.lidarRanges)): lidarFormatted.append(str(i) + ': ' + str(self.lidarRanges[i])) print(lidarFormatted) def _getLogDirectory(self): loggingDir = "Logging/" latestFile = "" latestMod = 0 for file in os.listdir(loggingDir): fileModTime = os.path.getmtime(loggingDir + file) if fileModTime > latestMod: latestMod = fileModTime latestFile = file self.logDir = loggingDir + latestFile + "/" self.path = self.logDir + "log/" def _logEpisode(self): self.duration = time.time() - self.start self.epInfo.append([ round(self.duration, 3), round(self.totalreward, 3), self.epConc, self.counter, "Goal:", self.goal, "Start position:", self.startPosition, "Rewards:", self.moveTowardGoalTotalReward, self.obsProximityTotalPunish, self.faceObstacleTotalPunish ])
import numpy as np import scipy.spatial.distance as distance # create the Robot instance. robot = Supervisor() # get the time step of the current world. timestep = int(robot.getBasicTimeStep()) robot_node = robot.getFromDef("burger") trans = robot_node.getField("translation") rot = robot_node.getField("rotation") goal_node = robot.getFromDef("goal") goal_trans = goal_node.getField('translation') leftmotor = robot.getMotor("left wheel motor") rightmotor = robot.getMotor("right wheel motor") leftmotor.setPosition(float("inf")) rightmotor.setPosition(float("inf")) leftmotor.setVelocity(0) rightmotor.setVelocity(0) MAX_SPEED = 2.84 def boundingangle(x): while (x > np.pi): x -= 2 * np.pi
class WebotsKukaEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self._supervisor = Supervisor() self._timestep = 64 # List of objects to be taken into consideration self._objects_names = [] self._objects = {} # List of links name self._link_names = ["arm1", "arm2", "arm3", "arm4", "arm5", "finger1", "finger2"] # Get motors self._link_objects = {} for link in self._link_names: self._link_objects[link] = self._supervisor.getMotor(link) # Get sensors self._link_position_sensors = {} self._min_position = {} self._max_position = {} for link in self._link_names: self._link_position_sensors[link] = self._link_objects[link].getPositionSensor() self._link_position_sensors[link].enable(self._timestep) self._min_position[link] = self._link_objects[link].getMinPosition() self._max_position[link] = self._link_objects[link].getMaxPosition() self._supervisor.step(self._timestep) # self.camera = self._supervisor.getCamera("camera"); # self.camera.enable(self._timestep); ###### UTIL FUNCTIONS - START ###### def get_links_num(self): return len(self._link_names) def set_timestep_simulation(self, step): self._timestep = step def set_objects_names(self, names): self._objects_names = names for obj in self._objects_names: self._objects[obj] = self._supervisor.getFromDef(obj) def get_link_positions(self): positions = [] for link in self._link_names: positions.append(self._link_position_sensors[link].getValue()) return np.array(positions) def set_link_positions(self, positions): assert(len(positions)==len(self._link_names)) i = 0 for link in self._link_names: self._link_objects[link].setPosition(positions[i]) i = i + 1 def get_objects_positions(self): obj_positions = {} for obj in self._objects_names: obj_positions[obj] = self._objects[obj].getPosition() return obj_positions def get_state(self): state = [] for link in self._link_names: state = state + [self._link_position_sensors[link].getValue()] for obj in self._objects_names: state = state + self._objects[obj].getPosition() return np.array(state) ###### UTIL FUNCTIONS - END ###### ###### GYM FUNCTIONS - START ###### def step(self, action): new_state = [] reward = 0 done = False obs = [] self.set_link_positions(action) self._supervisor.step(self._timestep) return new_state, reward, done, obs def reset(self): print("Reset") self._supervisor.simulationReset() self._supervisor.simulationResetPhysics() self._supervisor.step(1) for link in self._link_names: self._link_position_sensors[link].enable(self._timestep) def render(self, mode='human', close=False): pass
class P9RLEnv(gym.Env): metadata = {'render.modes': ['human']} motors = [] timeStep = 1 restrictedGoals = True boxEnv = False isFixedGoal = False fixedGoal = [3.5, 0] logging = False maxTimestep = 7000 robotResetLocation = [0, 0, 0] def __init__(self): # Initialize TurtleBot specifics self.maxAngSpeed = 1.5 # 2.84 max self.maxLinSpeed = 0.1 # 0.22 max # Initialize reward parameters: self.moveTowardsParam = 1 self.moveAwayParam = 1.1 self.safetyLimit = 0.75 self.obsProximityParam = 1 self.EOEPunish = -300 self.EOEReward = 600 self.EOERewardCounter = 0 self.EpisodeCounter = 0 # Total reward counter for each component self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 # Initialize RL specific variables self.rewardInterval = 1 # How often do you want to get rewards self.epConc = '' # Conculsion of episode [Collision, Success, TimeOut] self.reward = 0 # Reward gained in the timestep self.totalreward = 0 # Reward gained throughout the episode self.counter = 0 # Timestep counter self.needReset = True # self.state = [] # State of the environment self.done = False # If the episode is done self.action = [0, 0] # Current action chosen self.prevAction = [0, 0] # Past action chosen self.goal = [] # Goal self.goals = [x / 100 for x in range(-450, 451, 150)] #self.goals = [[0.72, 6.78],[5.03, 6.78],[12.7, 6.78], [10.9, -1.68], [3, 0],[-11.5, -2.5]] self.seeded = False self.dist = 0 # Distance to the goal self.prevDist = 0 # Previous distance to the goal # Logging variables self.currentEpisode = 0 # Current episode number self.start = 0 # Timer for starting self.duration = 0 # Episode duration in seconds self.epInfo = [] # Logging info for the episode self.startPosition = [] self.prevMax = 0 # temporary variable # Initialize a supervisor self.supervisor = Supervisor() # Initialize robot self.robot = self.supervisor.getFromDef('TB') self.translationField = self.robot.getField('translation') self.orientationField = self.robot.getField('rotation') # Initialize lidar self.lidar = self.supervisor.getLidar('LDS-01') self.lidarDiscretization = 30 self.lidar.enable(self.timeStep) self.lidarRanges = [] # Initialize Motors self.motors.append(self.supervisor.getMotor("right wheel motor")) self.motors.append(self.supervisor.getMotor("left wheel motor")) self.motors[0].setPosition(float("inf")) self.motors[1].setPosition(float("inf")) self.motors[0].setVelocity(0.0) self.motors[1].setVelocity(0.0) self.direction = [] self.position = [] self.orientation = [] # Initialize Goal Object self.goalObject = self.supervisor.getFromDef('GOAL') self.goalTranslationField = self.goalObject.getField('translation') # Initialize action-space and observation-space self.action_space = spaces.Box(low=np.array( [-self.maxLinSpeed, -self.maxAngSpeed]), high=np.array([0, self.maxAngSpeed]), dtype=np.float32) #TODO: MAKE POSTIVE LINEAR SPEED A FORWARD MOTION self.observation_space = spaces.Box(low=-10, high=10, shape=(14, ), dtype=np.float16) self.counter = 0 def reset(self): self._startEpisode() # Reset evetything needs resetting self.EpisodeCounter = self.EpisodeCounter + 1 print(self.EpisodeCounter) if not self.seeded: random.seed(self.currentEpisode) self.seeded = True # Make it Happen self.supervisor.step(self.timeStep) self.goal = self._setGoal() # Set Goal self._resetObject(self.goalObject, [self.goal[0], 0.01, self.goal[1]]) self._getState() self.startPosition = self.position[:] self.state = [self.dist, self.direction ] + self.lidarRanges # Set State #self.state = [self.dist, self.direction] + self.lidarRanges # Set State return np.asarray(self.state) def step(self, action): self.action = action # Take action self._take_action(action) self.counter += 1 #sself.supervisor.simulationSetMode(3) self._getState() # Observe new state self.state = np.asarray([self.dist, self.direction] + self.lidarRanges) self.prevAction = self.action[:] # Set previous action # Get State(dist from obstacle + lidar) and convert to numpyarray #print(self.lidarRanges[0]) self.supervisor.step(32) self.reward = self._calculateReward() # get Reward self.done, extraReward = self._isDone( ) # determine if state is Done, extra reward/punishment self.reward += extraReward self.totalreward += self.reward return [self.state, self.reward, self.done, {}] def _trimLidarReadings(self, lidar): lidarReadings = [] new_lidars = [x if x != 0 else 3.5 for x in lidar] # Replace 0's with 3.5 (max reading) for x in range(0, 360, self.lidarDiscretization): end = x + self.lidarDiscretization lidarReadings.append(min( new_lidars[x:end])) # Take the minimum of lidar Readings return lidarReadings def _resetObject(self, object, coordinates): object.getField('translation').setSFVec3f(coordinates) object.getField('rotation').setSFRotation([0, 1, 0, 0]) object.resetPhysics() def _setGoal(self): if len(self.goals) == 6: return random.choice(self.goals) if not self.isFixedGoal: while True: if self.restrictedGoals: gs = random.sample(self.goals, 1) gs.append(random.randrange(-45, 45) / 10) g = gs[:] if np.random.randint(2) == 0 else [gs[1], gs[0]] elif self.boxEnv: xGoal = random.randrange( -40, -25) / 10 if np.random.randint( 2) == 0 else random.randrange(25, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [yGoal, xGoal ] if np.random.randint(2) == 0 else [xGoal, yGoal] else: xGoal = random.randrange(-40, 40) / 10 yGoal = random.randrange(-40, 40) / 10 g = [xGoal, yGoal] pos1 = self.robot.getPosition()[0] pos2 = self.robot.getPosition()[2] distFromGoal = ((g[0] - pos1)**2 + (g[1] - pos2)**2)**0.5 if distFromGoal >= 1: break else: g = self.fixedGoal print('Goal set with Coordinates < {}, {} >'.format(g[0], g[1])) return g def _getDist(self): return ((self.goal[0] - self.position[0])**2 + (self.goal[1] - self.position[1])**2)**0.5 def _take_action(self, action): if self.logging: print( '\t\033[1mLinear velocity:\t{}\n\tAngular velocity:\t{}\n\033[0m' .format(action[0], action[1])) convertedVelocity = self.setVelocities(action[0], action[1]) self.motors[0].setVelocity(float(convertedVelocity[0])) self.motors[1].setVelocity(float(convertedVelocity[1])) def _isDone(self): if self.counter >= self.maxTimestep: # Check maximum timestep self.needReset = True return True, self.EOEPunish minScan = min(list(filter(lambda a: a != 0, self.lidarRanges[:]))) # Check LiDar if minScan < 0.2: self.needReset = True return True, self.EOEPunish elif self.dist < 0.35: # Check Goal self.needReset = True if self.boxEnv else False self.EOERewardCounter = self.EOERewardCounter + 1 print("SUCCESS COUNTER:", self.EOERewardCounter) return True, self.EOEReward else: return False, 0 def render(self, mode='human', close=False): pass def setVelocities(self, linearV, angularV): R = 0.033 # Wheel radius L = 0.138 # Wheelbase length # vr = (((2 * linearV) + (angularV * L)) / (2 * R)) vr = ((linearV + (L / 2) * angularV)) / R vl = ((linearV - (L / 2) * angularV)) / R return [vl, vr] def _getDirection(self): # Get direction of goal from Robot robgoaly = self.goal[1] - self.position[1] robgoalx = self.goal[0] - self.position[0] goal_angle = math.atan2(robgoalx, robgoaly) # Get difference between goal direction and orientation heading = goal_angle - self.orientation if heading > pi: # Wrap around pi heading -= 2 * pi elif heading < -pi: heading += 2 * pi return heading def _calculateReward(self): action1 = np.round(self.action[0], 1) action2 = np.round(self.action[1], 1) if self.counter % self.rewardInterval != 0: return 0 # Check if it's Reward Time distRate = self.prevDist - self.dist if self.prevDist == 0: self.prevDist = self.dist return 0 moveTowardsGoalReward = self._rewardMoveTowardsGoal(distRate) self.moveTowardGoalTotalReward += moveTowardsGoalReward obsProximityPunish = self._rewardObstacleProximity() self.obsProximityTotalPunish += obsProximityPunish reward = moveTowardsGoalReward + obsProximityPunish * 3 totalRewardDic = { "faceObstacleTotalPunish": self.faceObstacleTotalPunish, "obsProximityTotalPunish": self.obsProximityTotalPunish, "moveTowardGoalTotalReward": self.moveTowardGoalTotalReward } if self.logging: self._printRewards(totalRewardDic) print(reward) self.prevDist = self.dist return reward def _rewardMoveTowardsGoal(self, distRate): if distRate <= 0: return self.moveAwayParam * (distRate / 0.0044) else: return self.moveTowardsParam * (distRate / 0.0044) def _rewardObstacleProximity(self): closestObstacle = min(self.lidarRanges) if closestObstacle <= self.safetyLimit: return self.obsProximityParam * -( 1 - (closestObstacle / self.safetyLimit)) else: return 0 def _startEpisode(self): if self.needReset: # Reset object(s) / counters / rewards print('Resetting Robot...') self._resetObject(self.robot, self.robotResetLocation) self.counter = 0 self.totalreward = 0 self.moveTowardGoalTotalReward = 0 self.obsProximityTotalPunish = 0 self.faceObstacleTotalPunish = 0 self.prevDist = 0 def _getState(self): self.lidarRanges = self._trimLidarReadings( self.lidar.getRangeImage()) # Get LidarInfo self.position = self.robot.getPosition( )[::2] # Get Position (x,y) and orientation self.orientation = self.orientationField.getSFRotation()[3] self.dist = self._getDist() # Get Eucledian distance from the goal self.direction = self._getDirection() def _printRewards(self, r, interval=1): if self.counter % interval == 0: print("\n\033[1mEpisode {}, timestep {}\033[0m".format( self.currentEpisode, self.counter)) for k, v in r.items(): print("\t" + str(k) + ":\t" + str(v)) def _printMax(self, reward): maxR = reward if self.prevMax < reward else self.prevMax self.prevMax = maxR print("max: {}".format(maxR)) def _printLidarRanges(self): lidarFormatted = [] for i in range(len(self.lidarRanges)): lidarFormatted.append(str(i) + ': ' + str(self.lidarRanges[i])) print(lidarFormatted) def _logEpisode(self): self.duration = time.time() - self.start self.epInfo.append([ round(self.duration, 3), round(self.totalreward, 3), self.epConc, self.counter, "Goal:", self.goal, "Start position:", self.startPosition, "Rewards:", self.moveTowardGoalTotalReward, self.obsProximityTotalPunish, self.faceObstacleTotalPunish ])
class WebotsKukaEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.alpha = alpha self.beta = beta self.gamma = gamma self.desired_pos = np.array([-0.15, 0.185, 0]) self.objects_initial_positions = { "ring": [0.53, 0.015, 0], "ball": [0.4, 0.025, 0], "box": [0.5, 0.02, 0] } self._supervisor = Supervisor() self._timestep = 32 self._touch = 0 self._num_rew = 0 # List of objects to be taken into consideration self._objects_names = [] self._objects = {} # Get Fingers self.finger_names = ["TouchSensor1", "TouchSensor2"] self.fingers = {} for fin in self.finger_names: self.fingers[fin] = self._supervisor.getFromDef(fin) # List of links name self._link_names = [ "arm1", "arm2", "arm3", "arm4", "arm5", "finger1", "finger2" ] # Get motors self._link_objects = {} for link in self._link_names: self._link_objects[link] = self._supervisor.getMotor(link) # Get Touch sensor (Added) self._touch_sensor_names = ["touch sensor1", "touch sensor2"] #Added self._touch_sensor_object = {} for sensor in self._touch_sensor_names: self._touch_sensor_object[ sensor] = self._supervisor.getTouchSensor(sensor) self._touch_sensor_object[sensor].enable(self._timestep) # Get sensors self._link_position_sensors = {} self._min_position = {} self._max_position = {} for link in self._link_names: self._link_position_sensors[link] = self._link_objects[ link].getPositionSensor() self._link_position_sensors[link].enable(self._timestep) self._min_position[link] = self._link_objects[link].getMinPosition( ) self._max_position[link] = self._link_objects[link].getMaxPosition( ) self._supervisor.step(self._timestep) # self.camera = self._supervisor.getCamera("camera"); # self.camera.enable(self._timestep); self._floor = 0 self._finger = 0 self._touch = 0 self._i = 0 ###### UTIL FUNCTIONS - START ###### def get_links_num(self): return len(self._link_names) def set_timestep_simulation(self, step): self._timestep = step def set_objects_names(self, names): self._objects_names = names for obj in self._objects_names: self._objects[obj] = self._supervisor.getFromDef(obj) def set_finger_name(self, name): self.finger_names = name self.finger = self._supervisor.getFromDef(name) def get_link_positions(self): positions = [] for link in self._link_names: positions.append(self._link_position_sensors[link].getValue()) return np.array(positions) def set_link_positions(self, positions): assert (len(positions) == len(self._link_names)) i = 0 for link in self._link_names: self._link_objects[link].setPosition(positions[i]) i = i + 1 def get_objects_positions(self): obj_positions = {} for obj in self._objects_names: obj_positions[obj] = self._objects[obj].getPosition() return obj_positions def object_position(self): obj_position = self._objects[self._objects_names[0]].getPosition() return obj_position def get_state(self): state = [] for link in self._link_names: state = state + [self._link_position_sensors[link].getValue()] for obj in self._objects_names: state = state + self._objects[obj].getPosition() return np.array(state) def object_finger_distance(self, object_name): finger_pos_00 = self.fingers["TouchSensor1"].getPosition() finger_pos_00_array = np.array(finger_pos_00) finger_pos_01 = self.fingers["TouchSensor2"].getPosition() finger_pos_01_array = np.array(finger_pos_01) obj_pos = self.get_objects_positions()[object_name] obj_pos_array = np.array(obj_pos) if (object_name == "ring" or object_name == "box"): finger_distances = [ np.linalg.norm(finger_pos_00_array - obj_pos_array + [0, 0, +0.019]), np.linalg.norm(finger_pos_01_array - obj_pos_array + [0, 0, 0.019]) ] else: finger_distances = [ np.linalg.norm(finger_pos_00_array - obj_pos_array + [0, 0, +0]), np.linalg.norm(finger_pos_01_array - obj_pos_array + [0, 0, 0]) ] return np.array(finger_distances).mean() def randomizeEnvironment(self): object_notToGrasp = self._supervisor.getFromDef(self._objects_names[1]) _translation = object_notToGrasp.getField("translation") array_pos = [] array_pos.append(object_notToGrasp.getPosition()) array_pos.append([1, 0, 1]) array_pos.append([-0.15, 0.195, 0]) rand = randrange(0, 3) new_position = array_pos[rand] _translation.setSFVec3f(new_position) ###### UTIL FUNCTIONS - END ###### ###### GYM FUNCTIONS - START ###### def step(self, action): new_state = [] reward = 0 done = False obs = [] obs = self._get_obs() reward = self._compute_reward(obs, done) self.set_link_positions(action) self._supervisor.step(self._timestep) return new_state, reward, done, obs def _compute_reward(self, observations, done): sensor_mean = (observations["TOUCH_SENSORS"] / 200).mean() obj_pos = observations["OBJECT_POSITION"] floor_distance = self.alpha * np.exp( -(np.linalg.norm(obj_pos - self.desired_pos)**2) / (2 * dist_dev_alpha**2)) finger_distance = self.beta * np.exp(-( (self.object_finger_distance(self._objects_names[0]) + 0.9)**2) / (2 * dist_dev_beta**2)) touch = self.gamma * sensor_mean touch_distance = touch * np.exp( -(self.object_finger_distance(self._objects_names[0])**2) / (2 * dist_dev_gamma**2)) self._num_rew += 1 self._touch += touch_distance #reward = floor_distance #Per spostamento reward = finger_distance + touch_distance #Per presa self._floor += floor_distance self._finger += finger_distance self._touch += touch_distance self._i += 1 ''' print("------------------------") print("Floor Distance: ") print(floor_distance) print("Finger Distance") print(finger_distance) print("Reward: ") print(reward) print("------------------------") ''' # print( # "Dfl: %-10.8f Dfi: %-10.8f D*T: %-10.8f" # % (floor_distance, finger_distance, touch_distance) # ) return reward def _get_obs(self): joint_positions = self.get_link_positions() touch_sensors = np.array([ np.linalg.norm(self._touch_sensor_object[sensor].getValues()) for sensor in self._touch_sensor_names ]) obj = self.object_position() obj_position = np.array(obj) obs = { "JOINT_POSITIONS": joint_positions, "TOUCH_SENSORS": touch_sensors, "OBJECT_POSITION": obj_position, } return obs def reset(self): #print("Reset") #@ if (self._i != 0): print("floor: ", self._floor, " finger: ", self._finger, " touch: ", self._touch) self._i = 0 self._floor = 0 self._finger = 0 self._touch = 0 self._supervisor.simulationReset() self._supervisor.simulationResetPhysics() self._supervisor.step(1) #self.randomizeEnvironment() for link in self._link_names: self._link_position_sensors[link].enable(self._timestep) for sensor in self._touch_sensor_names: self._touch_sensor_object[sensor].enable(self._timestep) def render(self, mode='human', close=False): pass ###### GYM FUNCTIONS - END ###### ###### CLASSIFIER FUNCITIONS - START ###### def _objectPositionClassifier(self, object_position, object_initial_position): object_position = np.array(object_position) object_initial_position = np.array(object_initial_position) diff = np.linalg.norm(object_position - object_initial_position) return diff <= 0.001 def _fingerDistanceClassifier(self, finger_distance): return finger_distance <= 0.05 def _desiredPosClassifier(self, object_position, desired_pos): diff = np.linalg.norm(object_position - desired_pos) return diff <= 0.03 def _touchSensorsClassifier(self, touch_sensors): esito = True for touch_sensor in touch_sensors: esito = esito and (touch_sensor >= 300) return esito def _jointPositionClassifier(self, joint_positions): esito = True for joint_position in joint_positions: esito = esito and (abs(joint_position) <= 0.01) return esito def _getValuesFromSensors(self): obs = self._get_obs() values = {} i = 0 obj_names = self._objects_names.copy() obj_names.sort() for obj_name in self._objects_names: values[i] = self._objectPositionClassifier( self.get_objects_positions()[obj_name], self.objects_initial_positions[obj_name]) values[i + 1] = self._fingerDistanceClassifier( self.object_finger_distance(obj_name)) values[i + 2] = self._desiredPosClassifier( self.get_objects_positions()[obj_name], self.desired_pos) i += 3 values[i] = self._touchSensorsClassifier(obs["TOUCH_SENSORS"]) values[i + 1] = self._jointPositionClassifier( self.get_link_positions()) return values
class UniversalController: def __init__(self, network, robot_name="standard", target_name="TARGET", num_of_inputs=4, eval_run_time=100, time_step=32): self.neural_network = network self.TIME_STEP = time_step #standard time step is 32 or 64 self.supervisor = Supervisor() self.inputs = num_of_inputs self.solution_threshold = 0.95 #call methods functions to intialise robot and target self.get_and_set_robot(robot_name) self.get_and_set_target(target_name) self.eval_run_time = eval_run_time #default to 1m x 1m space but can be edited directly or using method below self.max_distance = 1.4142 self.data_reporting = DataReporting() def get_and_set_robot(self, robot_name): # initialise robot node and components self.robot_node = self.supervisor.getFromDef( robot_name) #TARGET ROBOT MUST BE NAMED IN DEF FIELD self.robot_trans = self.robot_node.getField("translation") self.robot_rotation = self.robot_node.getField("rotation") #call start location function to store initial position self.get_and_set_robotStart() #call motors function self.get_and_set_motors() def get_and_set_robotStart(self): #establish robot starting position self.robot_starting_location = self.robot_trans.getSFVec3f() self.robot_starting_rotation = self.robot_rotation.getSFRotation() '''EDIT HERE TO EXPAND TO DIFFERENT NUMBERS OF WHEELS/MOTORS''' def get_and_set_motors(self): self.motors = [] self.motor_max_Vs = [] #get robot motors - currently works for all two wheel motor morphologies self.left_motor = self.supervisor.getMotor('left wheel motor') self.right_motor = self.supervisor.getMotor('right wheel motor') self.left_motor.setPosition(float('inf')) self.right_motor.setPosition(float('inf')) #get the max velocity each motor is capable of and set the max velocity self.left_motor_max = self.left_motor.getMaxVelocity() self.right_motor_max = self.right_motor.getMaxVelocity() #append necessary additional motors and max velocities here after enabling as above self.motors.append(self.left_motor) self.motors.append(self.right_motor) self.motor_max_Vs.append(self.left_motor_max) self.motor_max_Vs.append(self.right_motor_max) def get_and_set_target(self, target_name): #get target location self.target = self.supervisor.getFromDef( target_name) #TARGET MUST BE NAMED IN DEF FIELD #get and set the translation and location for retrieval when needed self.target_trans = self.target.getField("translation") self.target_location = self.target_trans.getSFVec3f() def set_dimensions(self, space_dimensions): #basic pythagorean calculation to find max distance possible in square space self.max_distance = np.sqrt(space_dimensions[0]**2 + space_dimensions[1]**2) #print(self.max_distance) def set_distance_sensors(self, distance_sensors): #takes in array of strings - names of distance sensors self.distance_sensors = [] self.min_DS_values = [] self.DS_value_range = [] for i in range(len(distance_sensors)): #set and enable each sensor self.distance_sensors.append( self.supervisor.getDistanceSensor(distance_sensors[i])) self.distance_sensors[i].enable(self.TIME_STEP) #get and store the min reading value of each sensor self.min_DS_values.append(self.distance_sensors[i].getMinValue()) #get and store the possible value range of each sensor self.DS_value_range.append(self.distance_sensors[i].getMaxValue() - self.min_DS_values[i]) #print(self.DS_value_range) def get_DS_values(self): #get distance sensor values values = [] for i in range(len(self.distance_sensors)): value = self.distance_sensors[i].getValue() #value = value/self.maxDSReading #practical max value value = value - self.min_DS_values[i] if value < 0.0: value = 0.0 #to account for gaussian noise value = value / (self.DS_value_range[i]) #account for gaussian noise providing higher than max reading if value > 1.0: value = 1.0 values.append(value) #return a list of the normalised sensor readings return values def compute_motors(self, DS_values): #get the outputs of the neural network and convert into wheel motor speeds #already fully flexible for multiple motors results = self.neural_network.forwardPass(DS_values) for i in range(len(self.motors)): self.motors[i].setVelocity(results[i] * self.motor_max_Vs[i]) def reset_all_physics(self): #reset robot physics and return to starting translation ready for next run self.robot_rotation.setSFRotation(self.robot_starting_rotation) self.robot_trans.setSFVec3f(self.robot_starting_location) self.robot_node.resetPhysics() '''SIMULATION FUNCTION - can also be used directly for manual DataVisualisationandTesting of weight arrays (to manually repeat successful solutions etc.)''' def evaluate_robot(self, individual, map_elites=False, all_novelty=False): self.neural_network.decodeEA( individual) #individual passed from algorithm #note simulator start time t = self.supervisor.getTime() if map_elites or all_novelty: velocity = [] angular_V = [] #run simulation for eval_run_time seconds while self.supervisor.getTime() - t < self.eval_run_time: #calculate the motor speeds from the sensor readings self.compute_motors(self.get_DS_values()) #check current objective fitness current_fit = self.calculate_fitness() if map_elites or all_novelty: currentV = self.robot_node.getVelocity() velocity.append( np.sqrt((currentV[0]**2) + (currentV[1]**2) + (currentV[2]**2))) angular_V.append(np.sqrt(currentV[4]**2)) #break if robot has reached the target if current_fit > self.solution_threshold: time_taken = self.supervisor.getTime() - t #safety measure due to time_step and thread lag if time_taken > 0.0: #then a legitimate solution fit = self.calculate_fitness() break if self.supervisor.step(self.TIME_STEP) == -1: quit() #Get only the X and Y coordinates to create the endpoint vector endpoint = self.robot_trans.getSFVec3f()[0:3:2] distance_FS = np.sqrt( (endpoint[0] - self.robot_starting_location[0])**2 + (endpoint[1] - self.robot_starting_location[2])**2) #reset the simulation self.reset_all_physics() #find fitness fit = self.calculate_fitness() if map_elites: average_velocity = np.average(velocity) #average_angular_V = np.average(angular_V) return average_velocity, distance_FS, fit if all_novelty: average_velocity = np.average(velocity) median_velocity = np.median(velocity) #print("fit = " + str(fit) + ", endpoint = " + str(endpoint[0]) + "," + str(endpoint[1]) + ", AV = " + str(average_velocity) + ", distanceFS = " + str(distance_FS)) return fit, endpoint, average_velocity, distance_FS, median_velocity return fit, endpoint def calculate_fitness(self): values = self.robot_trans.getSFVec3f() distance_from_target = np.sqrt( (self.target_location[0] - values[0])**2 + (self.target_location[2] - values[2])**2) fit = 1.0 - (distance_from_target / self.max_distance) return fit def sigma_test(self, my_EA, upper_limit=1.0, lower_limit=0.1): self.data_reporting.sigma_test(my_EA, upper_limit, lower_limit) def algorithm_test(self, my_EA, generations, total_runs, nipes=False, map_elites=False): self.data_reporting.algorithm_test(my_EA, generations, total_runs, nipes, map_elites) def control_group_test(self, generations=10000, total_runs=1): self.data_reporting.control_group_test( individual_size=self.neural_network.solution_size, eval_function=self.evaluate_robot, generations=generations, total_runs=total_runs)
class DarwinController: def __init__(self, namespace='', node=True): self.time = 0 self.clock_msg = Clock() self.namespace = namespace self.supervisor = Supervisor() self.motor_names = [ "ShoulderR", "ShoulderL", "ArmUpperR", "ArmUpperL", "ArmLowerR", "ArmLowerL", "PelvYR", "PelvYL", "PelvR", "PelvL", "LegUpperR", "LegUpperL", "LegLowerR", "LegLowerL", "AnkleR", "AnkleL", "FootR", "FootL", "Neck", "Head" ] self.walkready = [0] * 20 self.names_webots_to_bitbots = { "ShoulderR": "RShoulderPitch", "ShoulderL": "LShoulderPitch", "ArmUpperR": "RShoulderRoll", "ArmUpperL": "LShoulderRoll", "ArmLowerR": "RElbow", "ArmLowerL": "LElbow", "PelvYR": "RHipYaw", "PelvYL": "LHipYaw", "PelvR": "RHipRoll", "PelvL": "LHipRoll", "LegUpperR": "RHipPitch", "LegUpperL": "LHipPitch", "LegLowerR": "RKnee", "LegLowerL": "LKnee", "AnkleR": "RAnklePitch", "AnkleL": "LAnklePitch", "FootR": "RAnkleRoll", "FootL": "LAnkleRoll", "Neck": "HeadPan", "Head": "HeadTilt" } self.names_bitbots_to_webots = { "RShoulderPitch": "ShoulderR", "LShoulderPitch": "ShoulderL", "RShoulderRoll": "ArmUpperR", "LShoulderRoll": "ArmUpperL", "RElbow": "ArmLowerR", "LElbow": "ArmLowerL", "RHipYaw": "PelvYR", "LHipYaw": "PelvYL", "RHipRoll": "PelvR", "LHipRoll": "PelvL", "RHipPitch": "LegUpperR", "LHipPitch": "LegUpperL", "RKnee": "LegLowerR", "LKnee": "LegLowerL", "RAnklePitch": "AnkleR", "LAnklePitch": "AnkleL", "RAnkleRoll": "FootR", "LAnkleRoll": "FootL", "HeadPan": "Neck", "HeadTilt": "Head" } self.motors = [] self.sensors = [] self.timestep = int(self.supervisor.getBasicTimeStep()) self.timestep = 10 for motor_name in self.motor_names: self.motors.append(self.supervisor.getMotor(motor_name)) self.motors[-1].enableTorqueFeedback(self.timestep) self.sensors.append( self.supervisor.getPositionSensor(motor_name + "S")) self.sensors[-1].enable(self.timestep) self.accel = self.supervisor.getAccelerometer("Accelerometer") self.accel.enable(self.timestep) self.gyro = self.supervisor.getGyro("Gyro") self.gyro.enable(self.timestep) self.camera = self.supervisor.getCamera("Camera") self.camera.enable(self.timestep) if node: rospy.init_node("webots_darwin_ros_interface", anonymous=True, argv=['clock:=/' + self.namespace + '/clock']) self.pub_js = rospy.Publisher(self.namespace + "/joint_states", JointState, queue_size=1) self.pub_imu = rospy.Publisher(self.namespace + "/imu/data", Imu, queue_size=1) self.pub_cam = rospy.Publisher(self.namespace + "/image_raw", Image, queue_size=1) self.clock_publisher = rospy.Publisher(self.namespace + "/clock", Clock, queue_size=1) rospy.Subscriber(self.namespace + "/DynamixelController/command", JointCommand, self.command_cb) self.world_info = self.supervisor.getFromDef("world_info") self.hinge_joint = self.supervisor.getFromDef("barrier_hinge") self.robot_node = self.supervisor.getFromDef("Darwin") self.translation_field = self.robot_node.getField("translation") self.rotation_field = self.robot_node.getField("rotation") def step_sim(self): self.supervisor.step(self.timestep) def step(self): self.step_sim() self.time += self.timestep / 1000 self.publish_imu() self.publish_joint_states() self.clock_msg.clock = rospy.Time.from_seconds(self.time) self.clock_publisher.publish(self.clock_msg) def command_cb(self, command: JointCommand): for i, name in enumerate(command.joint_names): try: motor_index = self.motor_names.index( self.names_bitbots_to_webots[name]) self.motors[motor_index].setPosition(command.positions[i]) except ValueError: print( f"invalid motor specified ({self.names_bitbots_to_webots[name]})" ) self.publish_joint_states() self.publish_imu() self.publish_camera() def publish_joint_states(self): js = JointState() js.name = [] js.header.stamp = rospy.get_rostime() js.position = [] js.effort = [] for i in range(len(self.sensors)): js.name.append(self.names_webots_to_bitbots[self.motor_names[i]]) value = self.sensors[i].getValue() js.position.append(value) js.effort.append(self.motors[i].getTorqueFeedback()) self.pub_js.publish(js) def publish_imu(self): msg = Imu() msg.header.stamp = rospy.get_rostime() msg.header.frame_id = "imu_frame" accel_vels = self.accel.getValues() msg.linear_acceleration.x = ((accel_vels[0] - 512.0) / 512.0) * 3 * G msg.linear_acceleration.y = ((accel_vels[1] - 512.0) / 512.0) * 3 * G msg.linear_acceleration.z = ((accel_vels[2] - 512.0) / 512.0) * 3 * G gyro_vels = self.gyro.getValues() msg.angular_velocity.x = ((gyro_vels[0] - 512.0) / 512.0) * 1600 * ( math.pi / 180) # is 400 deg/s the real value msg.angular_velocity.y = ( (gyro_vels[1] - 512.0) / 512.0) * 1600 * (math.pi / 180) msg.angular_velocity.z = ( (gyro_vels[2] - 512.0) / 512.0) * 1600 * (math.pi / 180) self.pub_imu.publish(msg) def publish_camera(self): img_msg = Image() img_msg.header.stamp = rospy.get_rostime() img_msg.height = self.camera.getHeight() img_msg.width = self.camera.getWidth() img_msg.encoding = "bgra8" img_msg.step = 4 * self.camera.getWidth() img = self.camera.getImage() img_msg.data = img self.pub_cam.publish(img_msg) def set_gravity(self, active): if active: self.world_info.getField("gravity").setSFVec3f([0.0, -9.81, 0.0]) else: self.world_info.getField("gravity").setSFVec3f([0.0, 0.0, 0.0]) def reset_robot_pose(self, pos, quat): rpy = tf.transformations.euler_from_quaternion(quat) self.set_robot_pose_rpy(pos, rpy) self.robot_node.resetPhysics() def reset_robot_pose_rpy(self, pos, rpy): self.set_robot_pose_rpy(pos, rpy) self.robot_node.resetPhysics() def reset(self): self.supervisor.simulationReset() def node(self): s = self.supervisor.getSelected() if s is not None: print(f"id: {s.getId()}, type: {s.getType()}, def: {s.getDef()}") def set_robot_pose_rpy(self, pos, rpy): self.translation_field.setSFVec3f(pos_ros_to_webots(pos)) self.rotation_field.setSFRotation(rpy_to_axis(*rpy)) def get_robot_pose_rpy(self): pos = self.translation_field.getSFVec3f() rot = self.rotation_field.getSFRotation() return pos_webots_to_ros(pos), axis_to_rpy(*rot)
class ArmEnv(object): def __init__(self, step_max=100, add_noise=False): self.observation_dim = 12 self.action_dim = 6 """ state """ self.state = np.zeros(self.observation_dim) self.init_state = np.zeros(self.observation_dim) self.uncode_init_state = np.zeros(self.observation_dim) """ action """ self.action_high_bound = 1 self.action = np.zeros(self.action_dim) """ reward """ self.step_max = step_max self.insert_depth = 40 """setting""" self.add_noise = add_noise # or True """information for action and state""" self.action_space = spaces.Box(low=-0.4, high=0.4, shape=(self.action_dim, ), dtype=np.float32) self.observation_space = spaces.Box(low=-10, high=10, shape=(self.observation_dim, ), dtype=np.float32) """Enable PD controler""" self.pd = True """Setup controllable variables""" self.movementMode = 1 # work under FK(0) or IK(1) """Timer""" self.timer = 0 """Initialize the Webots Supervisor""" self.supervisor = Supervisor() self.timeStep = int(8) # TODO: It's there a way to start simulation automatically? '''enable world devices''' # Initialize the arm motors. self.motors = [] for motorName in [ 'A motor', 'B motor', 'C motor', 'D motor', 'E motor', 'F motor' ]: motor = self.supervisor.getMotor(motorName) self.motors.append(motor) # Get the arm, target and hole nodes. self.target = self.supervisor.getFromDef('TARGET') self.arm = self.supervisor.getFromDef('ARM') self.hole = self.supervisor.getFromDef('HOLE') self.armEnd = self.supervisor.getFromDef('INIT') # Get the absolute position of the arm base and target. self.armPosition = self.arm.getPosition() self.targetPosition = self.target.getPosition() self.initPosition = self.armEnd.getPosition() # Get the translation field fo the hole self.hole_translation = self.hole.getField('translation') self.hole_rotation = self.hole.getField('rotation') # Get initial position of hole self.hole_new_position = [] self.hole_new_rotation = [] self.hole_init_position = self.hole_translation.getSFVec3f() self.hole_init_rotation = self.hole_rotation.getSFRotation() print("Hole init position", self.hole_init_position) print("Hole init rotation", self.hole_init_rotation) # get and enable sensors # Fxyz: N, Txyz: N*m self.fz_sensor = self.supervisor.getMotor('FZ_SENSOR') self.fz_sensor.enableForceFeedback(self.timeStep) self.fx_sensor = self.supervisor.getMotor('FX_SENSOR') self.fx_sensor.enableForceFeedback(self.timeStep) self.fy_sensor = self.supervisor.getMotor('FY_SENSOR') self.fy_sensor.enableForceFeedback(self.timeStep) self.tx_sensor = self.supervisor.getMotor('TX_SENSOR') self.tx_sensor.enableTorqueFeedback(self.timeStep) self.ty_sensor = self.supervisor.getMotor('TY_SENSOR') self.ty_sensor.enableTorqueFeedback(self.timeStep) self.tz_sensor = self.supervisor.getMotor('TZ_SENSOR') self.tz_sensor.enableTorqueFeedback(self.timeStep) self.FZ = self.fz_sensor.getForceFeedback() self.FX = self.fx_sensor.getForceFeedback() self.FY = self.fy_sensor.getForceFeedback() self.TX = self.tx_sensor.getTorqueFeedback() self.TY = self.ty_sensor.getTorqueFeedback() self.TZ = self.tz_sensor.getTorqueFeedback() # # Used to plot F/T_t # self.plt_FX = [] # self.plt_FY = [] # self.plt_FZ = [] # self.plt_TX = [] # self.plt_TY = [] # self.plt_TZ = [] # self.plt_time = [] # self.plt_current_time = 0 """Initial Position of the robot""" # x/y/z in meters relative to world frame self.x = 0 self.y = 0 self.z = 0 # alpha/beta/gamma in rad relative to initial orientation self.alpha = 0 self.beta = 0 self.gamma = 0 """reset world""" self.reset() def step(self, action): self.supervisor.step(self.timeStep) self.timer += 1 # read state uncode_state, self.state = self.__get_state() # # record graph # self.plt_current_time += self.timeStep * 0.001 # self.plt_time.append(self.plt_current_time) # self.plt_FX.append(self.state[6]) # self.plt_FY.append(self.state[7]) # self.plt_FZ.append(self.state[8]) # self.plt_TX.append(self.state[9]) # self.plt_TY.append(self.state[10]) # self.plt_TZ.append(self.state[11]) # adjust action to usable motion action = cal.actions(self.state, action, self.pd) # print('action', action) # take actions self.__execute_action(action) # uncode_state, self.state = self.__get_state() # safety check safe = cal.safetycheck(self.state) # done and reward r, done = cal.reward_step(self.state, safe, self.timer) return self.state, uncode_state, r, done, safe def directstep(self, action): self.supervisor.step(self.timeStep) self.timer += 1 uncode_state, self.state = self.__get_state() # # record graph # self.plt_current_time += self.timeStep * 0.001 # self.plt_time.append(self.plt_current_time) # self.plt_FX.append(self.state[6]) # self.plt_FY.append(self.state[7]) # self.plt_FZ.append(self.state[8]) # self.plt_TX.append(self.state[9]) # self.plt_TY.append(self.state[10]) # self.plt_TZ.append(self.state[11]) # print('action', action) # take actions self.__execute_action(action) # uncode_state, self.state = self.__get_state() # safety check safe = cal.safetycheck(self.state) # done and reward r, done = cal.reward_step(self.state, safe, self.timer) return self.state, r, done, safe def restart(self): """restart world""" cal.clear() #print("*******************************world restart*******************************") '''set random position for hole''' hole_new_position = self.hole_new_position hole_new_rotation = self.hole_new_rotation self.hole_translation.setSFVec3f( [hole_new_position[0], 2, hole_new_position[2]]) self.hole_rotation.setSFRotation([ hole_new_rotation[0], hole_new_rotation[1], hole_new_rotation[2], hole_new_rotation[3] ]) '''reset signals''' self.timer = 0 "Initial Position of the robot" # x/y/z in meters relative to world frame self.x = self.initPosition[0] - self.armPosition[0] self.y = -(self.initPosition[2] - self.armPosition[2]) self.z = self.initPosition[1] - self.armPosition[1] # alpha/beta/gamma in rad relative to initial orientation self.alpha = 0.0 self.beta = 0.0 self.gamma = 0.0 # Call "ikpy" to compute the inverse kinematics of the arm. # ikpy only compute position ikResults = armChain.inverse_kinematics([[1, 0, 0, self.x], [0, 1, 0, self.y], [0, 0, 1, self.z], [0, 0, 0, 1]]) # Actuate the 3 first arm motors with the IK results. for i in range(3): self.motors[i].setPosition(ikResults[i + 1]) self.motors[3].setPosition(self.alpha) self.motors[4].setPosition(-ikResults[2] - ikResults[3] + self.beta) self.motors[5].setPosition(self.gamma) for i in range(6): self.motors[i].setVelocity(1.0) """wait for robot to move to initial place""" for i in range(20): self.supervisor.step(self.timeStep) for i in range(6): self.motors[i].setVelocity(0.07) '''state''' # get self.uncode_init_state, self.init_state, = self.__get_state() # '''reset graph''' # # Used to plot F/T_t # self.plt_FX.clear() # self.plt_FY.clear() # self.plt_FZ.clear() # self.plt_TX.clear() # self.plt_TY.clear() # self.plt_TZ.clear() # self.plt_time.clear() # self.plt_current_time = 0 # print('initial state:') # print("State 0-3", self.init_state[0:3]) # print("State 3-6", self.init_state[3:6]) # print("State 6-9", self.init_state[6:9]) # print("State 9-12", self.init_state[9:12]) done = False # reset simulation self.supervisor.simulationResetPhysics() return self.init_state, self.uncode_init_state, done def reset(self): """restart world""" cal.clear() #print("*******************************world rested*******************************") '''set random position for hole''' self.hole_new_position = self.hole_init_position + (np.random.rand(3) - 0.5) / 500 self.hole_new_rotation = self.hole_init_rotation + (np.random.rand(4) - 0.5) / 80 self.hole_translation.setSFVec3f( [self.hole_new_position[0], 2, self.hole_new_position[2]]) self.hole_rotation.setSFRotation([ self.hole_new_rotation[0], self.hole_new_rotation[1], self.hole_new_rotation[2], self.hole_new_rotation[3] ]) '''reset signals''' self.timer = 0 "Initial Position of the robot" # x/y/z in meters relative to world frame self.x = self.initPosition[0] - self.armPosition[0] self.y = -(self.initPosition[2] - self.armPosition[2]) self.z = self.initPosition[1] - self.armPosition[1] # alpha/beta/gamma in rad relative to initial orientation self.alpha = 0.0 self.beta = 0.0 self.gamma = 0.0 # Call "ikpy" to compute the inverse kinematics of the arm. # ikpy only compute position ikResults = armChain.inverse_kinematics([[1, 0, 0, self.x], [0, 1, 0, self.y], [0, 0, 1, self.z], [0, 0, 0, 1]]) # Actuate the 3 first arm motors with the IK results. for i in range(3): self.motors[i].setPosition(ikResults[i + 1]) self.motors[3].setPosition(self.alpha) self.motors[4].setPosition(-ikResults[2] - ikResults[3] + self.beta) self.motors[5].setPosition(self.gamma) for i in range(6): self.motors[i].setVelocity(1.0) """wait for robot to move to initial place""" for i in range(20): self.supervisor.step(self.timeStep) for i in range(6): self.motors[i].setVelocity(0.07) '''state''' # get self.uncode_init_state, self.init_state, = self.__get_state() # '''reset graph''' # # Used to plot F/T_t # self.plt_FX.clear() # self.plt_FY.clear() # self.plt_FZ.clear() # self.plt_TX.clear() # self.plt_TY.clear() # self.plt_TZ.clear() # self.plt_time.clear() # self.plt_current_time = 0 # print('initial state:') # print("State 0-3", self.init_state[0:3]) # print("State 3-6", self.init_state[3:6]) # print("State 6-9", self.init_state[6:9]) # print("State 9-12", self.init_state[9:12]) done = False # reset simulation self.supervisor.simulationResetPhysics() return self.init_state, self.uncode_init_state, done def __get_state(self): self.FZ = self.fz_sensor.getForceFeedback() self.FX = self.fx_sensor.getForceFeedback() self.FY = self.fy_sensor.getForceFeedback() self.TX = self.tx_sensor.getTorqueFeedback() self.TY = self.ty_sensor.getTorqueFeedback() self.TZ = -self.tz_sensor.getTorqueFeedback() currentPosition = [] currentPosition.append(self.targetPosition[0] - (self.x + self.armPosition[0])) currentPosition.append(self.targetPosition[2] - (self.armPosition[2] - self.y)) currentPosition.append(self.targetPosition[1] - (self.z + self.armPosition[1] - 0.14)) # state state = np.concatenate( (currentPosition, [self.alpha, self.beta, self.gamma], [self.FX, self.FY, self.FZ], [self.TX, self.TY, self.TZ])) code_state = cal.code_state(state) return state, code_state def __execute_action(self, action): """ execute action """ # do action self.x += action[0] self.y += action[1] self.z -= action[2] self.alpha += action[3] self.beta += action[4] self.gamma -= action[5] # bound position self.x = np.clip(self.x, self.initPosition[0] - self.armPosition[0] - 0.02, self.initPosition[0] - self.armPosition[0] + 0.02) self.y = np.clip(self.y, self.armPosition[2] - self.initPosition[2] - 0.02, self.armPosition[2] - self.initPosition[2] + 0.02) self.z = np.clip(self.z, self.initPosition[1] - self.armPosition[1] - 0.06, self.initPosition[1] - self.armPosition[1] + 0.04) self.alpha = np.clip(self.alpha, -1, 1) self.beta = np.clip(self.beta, -1, 1) self.gamma = np.clip(self.gamma, -1, 1) # Call "ikpy" to compute the inverse kinematics of the arm. # ikpy only compute position ikResults = armChain.inverse_kinematics([[1, 0, 0, self.x], [0, 1, 0, self.y], [0, 0, 1, self.z], [0, 0, 0, 1]]) # Actuate the 3 first arm motors with the IK results. for i in range(3): self.motors[i].setPosition(ikResults[i + 1]) self.motors[3].setPosition(self.alpha) self.motors[4].setPosition(-ikResults[2] - ikResults[3] + self.beta) self.motors[5].setPosition(self.gamma) def test_action(self, action): self.supervisor.step(self.timeStep) """ execute action """ # do action self.x += action[0] self.y += action[1] self.z -= action[2] self.alpha += action[3] self.beta += action[4] self.gamma -= action[5] # bound position self.x = np.clip(self.x, 0.94455 - self.armPosition[0] - 0.02, 0.94455 - self.armPosition[0] + 0.02) self.y = np.clip(self.y, self.armPosition[2] - 0.02, self.armPosition[2] + 0.02) self.z = np.clip(self.z, 2.255 - self.armPosition[1] - 0.06, 2.255 - self.armPosition[1] + 0.04) self.alpha = np.clip(self.alpha, -1, 1) self.beta = np.clip(self.beta, -1, 1) self.gamma = np.clip(self.gamma, -1, 1) # Call "ikpy" to compute the inverse kinematics of the arm. # ikpy only compute position ikResults = armChain.inverse_kinematics([[1, 0, 0, self.x], [0, 1, 0, self.y], [0, 0, 1, self.z], [0, 0, 0, 1]]) # ikResults = armChain.inverse_kinematics(self.EulerToMatrix(self.end_effector[:3], self.end_effector[3:6])) # print('ikresults', ikResults) # Actuate the 3 first arm motors with the IK results. # for i in range(6): # self.motors[i].setPosition(ikResults[i + 1]) # Actuate the 3 first arm motors with the IK results. for i in range(3): self.motors[i].setPosition(ikResults[i + 1]) self.motors[3].setPosition(self.alpha) self.motors[4].setPosition(-ikResults[2] - ikResults[3] + self.beta) self.motors[5].setPosition(self.gamma) _, self.state = self.__get_state() return self.state @staticmethod def sample_action(): return (np.random.rand(6) - 0.5) / 10
class Atlas(gym.Env): """ Y axis is the vertical axis. Base class for Webots actors in a Scene. These environments create single-player scenes and behave like normal Gym environments, if you don't use multiplayer. """ electricity_cost = -2.0 # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant stall_torque_cost = -0.1 # cost for running electric current through a motor even at zero rotational speed, small foot_collision_cost = -1.0 # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself joints_at_limit_cost = -0.2 # discourage stuck joints episode_reward = 0 frame = 0 _max_episode_steps = 1000 initial_y = None body_xyz = None joint_angles = None joint_exceed_limit = False ignore_frame = 1 def __init__(self, action_dim, obs_dim): self.robot = Supervisor() solid_def_names = self.read_all_def() self.def_node_field_list = self.get_all_fields(solid_def_names) self.robot_node = self.robot.getFromDef('Atlas') self.boom_base = self.robot.getFromDef('BoomBase') self.boom_base_trans_field = self.boom_base.getField("translation") self.timeStep = int(self.robot.getBasicTimeStep() * self.ignore_frame) # ms self.find_and_enable_devices() high = np.ones([action_dim]) self.action_space = gym.spaces.Box(-high, high, dtype=np.float32) high = np.inf*np.ones([obs_dim]) self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32) def read_all_def(self, file_name ='../../worlds/atlas_change_foot.wbt'): no_solid_str_list = ['HingeJoint', 'BallJoint', 'Hinge2Joint', 'Shape', 'Group', 'Physics'] with open(file_name) as f: content = f.readlines() # you may also want to remove whitespace characters like `\n` at the end of each line content = [x.strip() for x in content] def_str_list = [] for x in content: if 'DEF' in x: def_str_list.append(x) for sub_str in no_solid_str_list: for i in range(len(def_str_list)): if sub_str in def_str_list[i]: def_str_list[i] = 'remove_def' solid_def_names = [] for def_str in def_str_list: if 'remove_def' != def_str: def_str_temp_list = def_str.split() solid_def_names.append(def_str_temp_list[def_str_temp_list.index('DEF') + 1]) print(solid_def_names) print('There are duplicates: ',len(solid_def_names) != len(set(solid_def_names))) return solid_def_names def get_all_fields(self, solid_def_names): def_node_field_list = [] for def_name in solid_def_names: def_node = self.robot.getFromDef(def_name) node_trans_field = def_node.getField("translation") node_rot_field = def_node.getField("rotation") # print(def_name) node_ini_trans = node_trans_field.getSFVec3f() node_ini_rot = node_rot_field.getSFRotation() def_node_field_list.append({'def_node': def_node, 'node_trans_field': node_trans_field, 'node_rot_field': node_rot_field, 'node_ini_trans': node_ini_trans, 'node_ini_rot': node_ini_rot}) return def_node_field_list def reset_all_fields(self): for def_node_field in self.def_node_field_list: def_node_field['node_trans_field'].setSFVec3f(def_node_field['node_ini_trans']) def_node_field['node_rot_field'].setSFRotation(def_node_field['node_ini_rot']) def find_and_enable_devices(self): # inertial unit self.inertial_unit = self.robot.getInertialUnit("inertial unit") self.inertial_unit.enable(self.timeStep) # gps self.gps = self.robot.getGPS("gps") self.gps.enable(self.timeStep) # foot sensors self.fsr = [self.robot.getTouchSensor("RFsr"), self.robot.getTouchSensor("LFsr")] for i in range(len(self.fsr)): self.fsr[i].enable(self.timeStep) # all motors # motor_names = [# 'HeadPitch', 'HeadYaw', # 'LLegUay', 'LLegLax', 'LLegKny', # 'LLegLhy', 'LLegMhx', 'LLegUhz', # 'RLegUay', 'RLegLax', 'RLegKny', # 'RLegLhy', 'RLegMhx', 'RLegUhz', # ] motor_names = [ # 'HeadPitch', 'HeadYaw', 'BackLbz', 'BackMby', 'BackUbx', 'NeckAy', 'LLegLax', 'LLegMhx', 'LLegUhz', 'RLegLax', 'RLegMhx', 'RLegUhz', ] self.motors = [] for i in range(len(motor_names)): self.motors.append(self.robot.getMotor(motor_names[i])) # leg pitch motors self.legPitchMotor = [self.robot.getMotor('RLegLhy'), self.robot.getMotor('RLegKny'), self.robot.getMotor('RLegUay'), self.robot.getMotor('LLegLhy'), self.robot.getMotor('LLegKny'), self.robot.getMotor('LLegUay')] for i in range(len(self.legPitchMotor)): self.legPitchMotor[i].enableTorqueFeedback(self.timeStep) # leg pitch sensors self.legPitchSensor =[self.robot.getPositionSensor('RLegLhyS'), self.robot.getPositionSensor('RLegKnyS'), self.robot.getPositionSensor('RLegUayS'), self.robot.getPositionSensor('LLegLhyS'), self.robot.getPositionSensor('LLegKnyS'), self.robot.getPositionSensor('LLegUayS')] for i in range(len(self.legPitchSensor)): self.legPitchSensor[i].enable(self.timeStep) def apply_action(self, a): assert (np.isfinite(a).all()) for n, j in enumerate(self.legPitchMotor): max_joint_angle = j.getMaxPosition() min_joint_angle = j.getMinPosition() mean_angle = 0.5 * (max_joint_angle + min_joint_angle) half_range_angle = 0.5 * (max_joint_angle - min_joint_angle) j.setPosition(mean_angle + half_range_angle * float(np.clip(a[n], -1, +1))) # joint_angle = self.read_joint_angle(joint_idx=n) # torque = 0.5 * j.getMaxTorque() * float(np.clip(a[n], -1, +1)) # if joint_angle > max_joint_angle: # j.setPosition(max_joint_angle - 0.1) # # j.setTorque(-1.0 * abs(torque)) # elif joint_angle < min_joint_angle: # j.setPosition(min_joint_angle + 0.1) # # j.setTorque(abs(torque)) # else: # j.setTorque(torque) def read_joint_angle(self, joint_idx): joint_angle = self.legPitchSensor[joint_idx].getValue() % (2.0 * np.pi) if joint_angle > np.pi: joint_angle -= 2.0 * np.pi max_joint_angle = self.legPitchMotor[joint_idx].getMaxPosition() min_joint_angle = self.legPitchMotor[joint_idx].getMinPosition() if joint_angle > max_joint_angle + 0.05 or joint_angle < min_joint_angle - 0.05: self.joint_exceed_limit = True return joint_angle def calc_state(self): joint_states = np.zeros(2*len(self.legPitchMotor)) # even elements [0::2] position, scaled to -1..+1 between limits for r in range(6): joint_angle = self.read_joint_angle(joint_idx=r) if r in [0, 3]: joint_states[2 * r] = (-joint_angle - np.deg2rad(35)) / np.deg2rad(80) elif r in [1, 4]: joint_states[2 * r] = 1 - joint_angle / np.deg2rad(75) elif r in [2, 5]: joint_states[2 * r] = -joint_angle / np.deg2rad(45) # odd elements [1::2] angular speed, scaled to show -1..+1 for r in range(6): if self.joint_angles is None: joint_states[2 * r + 1] = 0.0 else: joint_states[2 * r + 1] = 0.5 * (joint_states[2*r] - self.joint_angles[r]) self.joint_angles = np.copy(joint_states[0::2]) self.joint_speeds = joint_states[1::2] self.joints_at_limit = np.count_nonzero(np.abs(joint_states[0::2]) > 0.99) self.joint_torques = np.zeros(len(self.legPitchMotor)) for i in range(len(self.legPitchMotor)): self.joint_torques[i] = self.legPitchMotor[i].getTorqueFeedback() \ / self.legPitchMotor[i].getAvailableTorque() if self.body_xyz is None: self.body_xyz = np.asarray(self.gps.getValues()) self.body_speed = np.zeros(3) else: self.body_speed = (np.asarray(self.gps.getValues()) - self.body_xyz) / (self.timeStep * 1e-3) self.body_xyz = np.asarray(self.gps.getValues()) body_local_speed = np.copy(self.body_speed) body_local_speed[0], body_local_speed[2] = self.calc_local_speed() # print('speed: ', np.linalg.norm(self.body_speed)) y = self.body_xyz[1] if self.initial_y is None: self.initial_y = y self.body_rpy = self.inertial_unit.getRollPitchYaw() ''' The roll angle indicates the unit's rotation angle about its x-axis, in the interval [-π,π]. The roll angle is zero when the InertialUnit is horizontal, i.e., when its y-axis has the opposite direction of the gravity (WorldInfo defines the gravity vector). The pitch angle indicates the unit's rotation angle about is z-axis, in the interval [-π/2,π/2]. The pitch angle is zero when the InertialUnit is horizontal, i.e., when its y-axis has the opposite direction of the gravity. If the InertialUnit is placed on the Robot with a standard orientation, then the pitch angle is negative when the Robot is going down, and positive when the robot is going up. The yaw angle indicates the unit orientation, in the interval [-π,π], with respect to WorldInfo.northDirection. The yaw angle is zero when the InertialUnit's x-axis is aligned with the north direction, it is π/2 when the unit is heading east, and -π/2 when the unit is oriented towards the west. The yaw angle can be used as a compass. ''' more = np.array([ y - self.initial_y, 0, 0, 0.3 * body_local_speed[0], 0.3 * body_local_speed[1], 0.3 * body_local_speed[2], # 0.3 is just scaling typical speed into -1..+1, no physical sense here self.body_rpy[0] / np.pi, self.body_rpy[1] / np.pi], dtype=np.float32) self.feet_contact = np.zeros(2) for j in range(len(self.fsr)): self.feet_contact[j] = self.fsr[j].getValue() return np.clip(np.concatenate([more] + [joint_states] + [self.feet_contact]), -5, +5) def calc_local_speed(self): ''' # progress in potential field is speed*dt, typical speed is about 2-3 meter per second, this potential will change 2-3 per frame (not per second), # all rewards have rew/frame units and close to 1.0 ''' direction_r = self.body_xyz - np.asarray(self.boom_base_trans_field.getSFVec3f()) # print('robot_xyz: {}, boom_base_xyz: {}'.format(self.body_xyz, # np.asarray(self.boom_base_trans_field.getSFVec3f()))) direction_r = direction_r[[0, 2]] / np.linalg.norm(direction_r[[0, 2]]) direction_t = np.dot(np.asarray([[0, 1], [-1, 0]]), direction_r.reshape((-1, 1))) return np.dot(self.body_speed[[0, 2]], direction_t), np.dot(self.body_speed[[0, 2]], direction_r) def alive_bonus(self, y, pitch): return +1 if abs(y) > 0.5 and abs(pitch) < 1.0 else -1 def render(self, mode='human'): file_name = 'img.jpg' self.robot.exportImage(file=file_name, quality=100) return cv2.imread(file_name, -1) def step(self, action): for i in range(self.ignore_frame): self.apply_action(action) simulation_state = self.robot.step(self.timeStep) state = self.calc_state() # also calculates self.joints_at_limit # state[0] is body height above ground, body_rpy[1] is pitch alive = float(self.alive_bonus(state[0] + self.initial_y, self.body_rpy[1])) progress, _ = self.calc_local_speed() # print('progress: {}'.format(progress)) feet_collision_cost = 0.0 ''' let's assume we have DC motor with controller, and reverse current braking ''' electricity_cost = self.electricity_cost * float(np.abs( self.joint_torques * self.joint_speeds).mean()) electricity_cost += self.stall_torque_cost * float(np.square(self.joint_torques).mean()) joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit) rewards = [ alive, progress, electricity_cost, joints_at_limit_cost, feet_collision_cost ] self.episode_reward += progress self.frame += 1 done = (-1 == simulation_state) or (self._max_episode_steps <= self.frame) \ or (alive < 0) or (not np.isfinite(state).all()) # print('frame: {}, alive: {}, done: {}, body_xyz: {}'.format(self.frame, alive, done, self.body_xyz)) # print('state_{} \n action_{}, reward_{}'.format(state, action, sum(rewards))) return state, sum(rewards), done, {} def run(self): # Main loop. for i in range(self._max_episode_steps): action = np.random.uniform(-1, 1, 6) state, reward, done, _ = self.step(action) # print('state_{} \n action_{}, reward_{}'.format(state, action, reward)) if done: break def reset(self, is_eval_only = False): self.initial_y = None self.body_xyz = None self.joint_angles = None self.frame = 0 self.episode_reward = 0 self.joint_exceed_limit = False for i in range(100): for j in self.motors: j.setPosition(0) for k in range(len(self.legPitchMotor)): j = self.legPitchMotor[k] j.setPosition(0) self.robot.step(self.timeStep) self.robot.simulationResetPhysics() self.reset_all_fields() for i in range(10): self.robot_node.moveViewpoint() self.robot.step(self.timeStep) if is_eval_only: time.sleep(1) return self.calc_state()
#position Sensor positionSensorList = [] for i in range(7): psName = 'positionSensor' + str(i + 1) ps = PositionSensor(psName) ps.enable(1) positionSensorList.append(ps) #----------- motorList = [] #連接馬達 for i in range(7): motorName = 'motor' + str(i + 1) motor = Motor(motorName) motor.setVelocity(1.0) motorList.append(motor) _motor = supervisor.getMotor('finger motor L') _motor = Motor('finger motor L') _motor.setVelocity(0.1) motorList.append(_motor) _motor = supervisor.getMotor('finger motor R') _motor.setVelocity(0.1) motorList.append(_motor) controller = PandaController.PandaController(motorList, positionSensorList) ''' ik code end---------------------------------------------------------------------------------------------------------- ''' ''' initial_obs, initial_state = initial_step() write_to_pipe([obs_pipe, touch_pipe], [initial_obs, initial_state]) print(np.array(initial_obs).shape, initial_state)
class UR5E(): def __init__(self): self.sup = Supervisor() self.timeStep = int(4 * self.sup.getBasicTimeStep()) # Create the arm chain from the URDF self.armChain = Chain.from_urdf_file('ur5e.urdf') self.arm_motors = self.getARMJoint() self.grip_motors = self.getGripperJoint() '''def __init__(self): self.robot = Supervisor() self.arm = self.robot.getSelf() self.timeStep = int(4 * self.robot.getBasicTimeStep()) filename = None with tempfile.NamedTemporaryFile(suffix='.urdf', delete=False) as file: filename = file.name file.write(self.robot.getUrdf().encode('utf-8')) self.armChain = Chain.from_urdf_file(filename) print(self.armChain) # Initialize the arm motors. self.arm_motors = [] for link in self.armChain.links: if 'sensor' in link.name: motor = self.robot.getMotor(link.name.replace('sensor', 'motor')) motor.setVelocity(1.0) self.arm_motors.append(motor)''' def getARMJoint(self): base_joint = self.sup.getMotor("shoulder_pan_joint") base_joint.setVelocity(0.5) joint1 = self.sup.getMotor("shoulder_lift_joint") joint1.setVelocity(0.5) joint2 = self.sup.getMotor("elbow_joint") joint2.setVelocity(0.5) joint3 = self.sup.getMotor("wrist_1_joint") joint3.setVelocity(0.5) joint4 = self.sup.getMotor("wrist_2_joint") joint4.setVelocity(0.5) joint5 = self.sup.getMotor("wrist_3_joint") joint5.setVelocity(0.5) # send joint command ''' base_joint.setPosition(0.2) joint1.setPosition(-1.8) joint2.setPosition(-1.6) joint3.setPosition(-1.32) joint4.setPosition(1.57) joint5.setPosition(0.2) ''' return [base_joint, joint1, joint2, joint3, joint4, joint5] # set base position def getGripperJoint(self): finger_1_joint_1 = self.sup.getMotor('finger_1_joint_1') finger_2_joint_1 = self.sup.getMotor('finger_1_joint_2') finger_middle_joint_1 = self.sup.getMotor('finger_1_joint_3') return [finger_1_joint_1, finger_2_joint_1, finger_middle_joint_1] def simpleDemo(self, target="target2"): target = self.sup.getFromDef(target) arm = self.sup.getSelf() for motor in self.grip_motors: print(motor) motor.setPosition(1) #self.grip_motors[0].setPosition(1) ''' self.arm_motors[0].setPosition(0.2) self.arm_motors[1].setPosition(-1.8) self.arm_motors[2].setPosition(-1.6) self.arm_motors[3].setPosition(-1.32) self.arm_motors[4].setPosition(1.57) self.arm_motors[5].setPosition(0.2) ''' '''