Exemplo n.º 1
0
class Atlas(gym.Env):
    """
        Y axis is the vertical axis.
        Base class for Webots actors in a Scene.
        These environments create single-player scenes and behave like normal Gym environments, if
        you don't use multiplayer.
    """

    electricity_cost = -2.0  # cost for using motors -- this parameter should be carefully tuned against reward for making progress, other values less improtant
    stall_torque_cost = -0.1  # cost for running electric current through a motor even at zero rotational speed, small
    foot_collision_cost = -1.0  # touches another leg, or other objects, that cost makes robot avoid smashing feet into itself
    joints_at_limit_cost = -0.2  # discourage stuck joints

    episode_reward = 0

    frame = 0
    _max_episode_steps = 1000

    initial_y = None
    body_xyz = None
    joint_angles = None
    joint_exceed_limit = False
    ignore_frame = 1


    def __init__(self, action_dim, obs_dim):

        self.robot = Supervisor()
        solid_def_names = self.read_all_def()
        self.def_node_field_list = self.get_all_fields(solid_def_names)

        self.robot_node = self.robot.getFromDef('Atlas')

        self.boom_base = self.robot.getFromDef('BoomBase')
        self.boom_base_trans_field = self.boom_base.getField("translation")
        self.timeStep = int(self.robot.getBasicTimeStep() * self.ignore_frame) # ms
        self.find_and_enable_devices()

        high = np.ones([action_dim])
        self.action_space = gym.spaces.Box(-high, high, dtype=np.float32)
        high = np.inf*np.ones([obs_dim])
        self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)

    def read_all_def(self, file_name ='../../worlds/atlas_change_foot.wbt'):
        no_solid_str_list = ['HingeJoint', 'BallJoint', 'Hinge2Joint', 'Shape', 'Group', 'Physics']
        with open(file_name) as f:
            content = f.readlines()
        # you may also want to remove whitespace characters like `\n` at the end of each line
        content = [x.strip() for x in content]
        def_str_list = []
        for x in content:
            if 'DEF' in x:
                def_str_list.append(x)
        for sub_str in no_solid_str_list:
            for i in range(len(def_str_list)):
                if sub_str in def_str_list[i]:
                    def_str_list[i] = 'remove_def'
        solid_def_names = []
        for def_str in def_str_list:
            if 'remove_def' != def_str:
                def_str_temp_list = def_str.split()
                solid_def_names.append(def_str_temp_list[def_str_temp_list.index('DEF') + 1])
        print(solid_def_names)
        print('There are duplicates: ',len(solid_def_names) != len(set(solid_def_names)))
        return solid_def_names

    def get_all_fields(self, solid_def_names):
        def_node_field_list = []
        for def_name in solid_def_names:
            def_node = self.robot.getFromDef(def_name)
            node_trans_field = def_node.getField("translation")
            node_rot_field = def_node.getField("rotation")
            # print(def_name)
            node_ini_trans = node_trans_field.getSFVec3f()
            node_ini_rot = node_rot_field.getSFRotation()

            def_node_field_list.append({'def_node': def_node,
                                        'node_trans_field': node_trans_field,
                                        'node_rot_field': node_rot_field,
                                        'node_ini_trans': node_ini_trans,
                                        'node_ini_rot': node_ini_rot})
        return def_node_field_list

    def reset_all_fields(self):
        for def_node_field in self.def_node_field_list:
            def_node_field['node_trans_field'].setSFVec3f(def_node_field['node_ini_trans'])
            def_node_field['node_rot_field'].setSFRotation(def_node_field['node_ini_rot'])

    def find_and_enable_devices(self):
        # inertial unit
        self.inertial_unit = self.robot.getInertialUnit("inertial unit")
        self.inertial_unit.enable(self.timeStep)

        # gps
        self.gps = self.robot.getGPS("gps")
        self.gps.enable(self.timeStep)

        # foot sensors

        self.fsr = [self.robot.getTouchSensor("RFsr"), self.robot.getTouchSensor("LFsr")]
        for i in range(len(self.fsr)):
            self.fsr[i].enable(self.timeStep)

        # all motors
        # motor_names = [# 'HeadPitch', 'HeadYaw',
        #                'LLegUay', 'LLegLax', 'LLegKny',
        #                'LLegLhy', 'LLegMhx', 'LLegUhz',
        #                'RLegUay', 'RLegLax', 'RLegKny',
        #                'RLegLhy', 'RLegMhx', 'RLegUhz',
        #                ]
        motor_names = [  # 'HeadPitch', 'HeadYaw',
             'BackLbz', 'BackMby', 'BackUbx', 'NeckAy',
             'LLegLax', 'LLegMhx', 'LLegUhz',
             'RLegLax', 'RLegMhx', 'RLegUhz',
        ]
        self.motors = []
        for i in range(len(motor_names)):
            self.motors.append(self.robot.getMotor(motor_names[i]))

        # leg pitch motors
        self.legPitchMotor = [self.robot.getMotor('RLegLhy'),
                              self.robot.getMotor('RLegKny'),
                              self.robot.getMotor('RLegUay'),
                              self.robot.getMotor('LLegLhy'),
                              self.robot.getMotor('LLegKny'),
                              self.robot.getMotor('LLegUay')]

        for i in range(len(self.legPitchMotor)):
            self.legPitchMotor[i].enableTorqueFeedback(self.timeStep)

        # leg pitch sensors
        self.legPitchSensor =[self.robot.getPositionSensor('RLegLhyS'),
                              self.robot.getPositionSensor('RLegKnyS'),
                              self.robot.getPositionSensor('RLegUayS'),
                              self.robot.getPositionSensor('LLegLhyS'),
                              self.robot.getPositionSensor('LLegKnyS'),
                              self.robot.getPositionSensor('LLegUayS')]
        for i in range(len(self.legPitchSensor)):
            self.legPitchSensor[i].enable(self.timeStep)


    def apply_action(self, a):
        assert (np.isfinite(a).all())
        for n, j in enumerate(self.legPitchMotor):
            max_joint_angle = j.getMaxPosition()
            min_joint_angle = j.getMinPosition()
            mean_angle = 0.5 * (max_joint_angle + min_joint_angle)
            half_range_angle = 0.5 * (max_joint_angle - min_joint_angle)
            j.setPosition(mean_angle + half_range_angle * float(np.clip(a[n], -1, +1)))

            # joint_angle = self.read_joint_angle(joint_idx=n)
            # torque = 0.5 * j.getMaxTorque() * float(np.clip(a[n], -1, +1))
            # if joint_angle > max_joint_angle:
            #     j.setPosition(max_joint_angle - 0.1)
            #     # j.setTorque(-1.0 * abs(torque))
            # elif joint_angle < min_joint_angle:
            #     j.setPosition(min_joint_angle + 0.1)
            #     # j.setTorque(abs(torque))
            # else:
            #     j.setTorque(torque)


    def read_joint_angle(self, joint_idx):
        joint_angle = self.legPitchSensor[joint_idx].getValue() % (2.0 * np.pi)
        if joint_angle > np.pi:
            joint_angle -= 2.0 * np.pi
        max_joint_angle = self.legPitchMotor[joint_idx].getMaxPosition()
        min_joint_angle = self.legPitchMotor[joint_idx].getMinPosition()
        if joint_angle > max_joint_angle + 0.05 or joint_angle < min_joint_angle - 0.05:
            self.joint_exceed_limit = True
        return joint_angle

    def calc_state(self):
        joint_states = np.zeros(2*len(self.legPitchMotor))
        # even elements [0::2] position, scaled to -1..+1 between limits
        for r in range(6):
            joint_angle = self.read_joint_angle(joint_idx=r)
            if r in [0, 3]:
                joint_states[2 * r] = (-joint_angle - np.deg2rad(35)) / np.deg2rad(80)
            elif r in [1, 4]:
                joint_states[2 * r] = 1 - joint_angle / np.deg2rad(75)
            elif r in [2, 5]:
                joint_states[2 * r] = -joint_angle / np.deg2rad(45)
        # odd elements  [1::2] angular speed, scaled to show -1..+1
        for r in range(6):
            if self.joint_angles is None:
                joint_states[2 * r + 1] = 0.0
            else:
                joint_states[2 * r + 1] = 0.5 * (joint_states[2*r] - self.joint_angles[r])

        self.joint_angles = np.copy(joint_states[0::2])
        self.joint_speeds = joint_states[1::2]
        self.joints_at_limit = np.count_nonzero(np.abs(joint_states[0::2]) > 0.99)

        self.joint_torques = np.zeros(len(self.legPitchMotor))
        for i in range(len(self.legPitchMotor)):
            self.joint_torques[i] = self.legPitchMotor[i].getTorqueFeedback() \
                                    / self.legPitchMotor[i].getAvailableTorque()

        if self.body_xyz is None:
            self.body_xyz = np.asarray(self.gps.getValues())
            self.body_speed = np.zeros(3)
        else:
            self.body_speed = (np.asarray(self.gps.getValues()) - self.body_xyz) / (self.timeStep * 1e-3)
            self.body_xyz = np.asarray(self.gps.getValues())

        body_local_speed = np.copy(self.body_speed)
        body_local_speed[0], body_local_speed[2] = self.calc_local_speed()

        # print('speed: ', np.linalg.norm(self.body_speed))
        y = self.body_xyz[1]
        if self.initial_y is None:
            self.initial_y = y

        self.body_rpy = self.inertial_unit.getRollPitchYaw()
        '''
        The roll angle indicates the unit's rotation angle about its x-axis, 
        in the interval [-π,π]. The roll angle is zero when the InertialUnit is horizontal, 
        i.e., when its y-axis has the opposite direction of the gravity (WorldInfo defines 
        the gravity vector).

        The pitch angle indicates the unit's rotation angle about is z-axis, 
        in the interval [-π/2,π/2]. The pitch angle is zero when the InertialUnit is horizontal, 
        i.e., when its y-axis has the opposite direction of the gravity. 
        If the InertialUnit is placed on the Robot with a standard orientation, 
        then the pitch angle is negative when the Robot is going down, 
        and positive when the robot is going up.

        The yaw angle indicates the unit orientation, in the interval [-π,π], 
        with respect to WorldInfo.northDirection. 
        The yaw angle is zero when the InertialUnit's x-axis is aligned with the north direction, 
        it is π/2 when the unit is heading east, and -π/2 when the unit is oriented towards the west. 
        The yaw angle can be used as a compass.
        '''

        more = np.array([
            y - self.initial_y,
            0, 0,
            0.3 * body_local_speed[0], 0.3 * body_local_speed[1], 0.3 * body_local_speed[2],
            # 0.3 is just scaling typical speed into -1..+1, no physical sense here
            self.body_rpy[0] / np.pi, self.body_rpy[1] / np.pi], dtype=np.float32)

        self.feet_contact = np.zeros(2)
        for j in range(len(self.fsr)):
            self.feet_contact[j] = self.fsr[j].getValue()

        return np.clip(np.concatenate([more] + [joint_states] + [self.feet_contact]), -5, +5)

    def calc_local_speed(self):
        '''
        # progress in potential field is speed*dt, typical speed is about 2-3 meter per second,
        this potential will change 2-3 per frame (not per second),
        # all rewards have rew/frame units and close to 1.0
        '''
        direction_r = self.body_xyz - np.asarray(self.boom_base_trans_field.getSFVec3f())
        # print('robot_xyz: {}, boom_base_xyz: {}'.format(self.body_xyz,
        #                                                 np.asarray(self.boom_base_trans_field.getSFVec3f())))
        direction_r = direction_r[[0, 2]] / np.linalg.norm(direction_r[[0, 2]])
        direction_t = np.dot(np.asarray([[0, 1],
                                  [-1, 0]]), direction_r.reshape((-1, 1)))
        return np.dot(self.body_speed[[0, 2]], direction_t), np.dot(self.body_speed[[0, 2]], direction_r)

    def alive_bonus(self, y, pitch):
        return +1 if abs(y) > 0.5 and abs(pitch) < 1.0 else -1

    def render(self, mode='human'):
        file_name = 'img.jpg'
        self.robot.exportImage(file=file_name, quality=100)
        return cv2.imread(file_name, -1)

    def step(self, action):
        for i in range(self.ignore_frame):
            self.apply_action(action)
            simulation_state = self.robot.step(self.timeStep)
        state = self.calc_state()  # also calculates self.joints_at_limit
        # state[0] is body height above ground, body_rpy[1] is pitch
        alive = float(self.alive_bonus(state[0] + self.initial_y,
                                       self.body_rpy[1]))

        progress, _ = self.calc_local_speed()
        # print('progress: {}'.format(progress))

        feet_collision_cost = 0.0


        '''
        let's assume we have DC motor with controller, and reverse current braking
        '''
        electricity_cost = self.electricity_cost * float(np.abs(
            self.joint_torques * self.joint_speeds).mean())
        electricity_cost += self.stall_torque_cost * float(np.square(self.joint_torques).mean())

        joints_at_limit_cost = float(self.joints_at_limit_cost * self.joints_at_limit)

        rewards = [
            alive,
            progress,
            electricity_cost,
            joints_at_limit_cost,
            feet_collision_cost
        ]

        self.episode_reward += progress

        self.frame += 1

        done = (-1 == simulation_state) or (self._max_episode_steps <= self.frame) \
               or (alive < 0) or (not np.isfinite(state).all())
        # print('frame: {}, alive: {}, done: {}, body_xyz: {}'.format(self.frame, alive, done, self.body_xyz))
        # print('state_{} \n action_{}, reward_{}'.format(state, action, sum(rewards)))
        return state, sum(rewards), done, {}

    def run(self):
        # Main loop.
        for i in range(self._max_episode_steps):
            action = np.random.uniform(-1, 1, 6)
            state, reward, done, _ = self.step(action)
            # print('state_{} \n action_{}, reward_{}'.format(state, action, reward))
            if done:
                break

    def reset(self, is_eval_only = False):
        self.initial_y = None
        self.body_xyz = None
        self.joint_angles = None
        self.frame = 0
        self.episode_reward = 0
        self.joint_exceed_limit = False

        for i in range(100):
            for j in self.motors:
                j.setPosition(0)
            for k in range(len(self.legPitchMotor)):
                j = self.legPitchMotor[k]
                j.setPosition(0)
            self.robot.step(self.timeStep)
        self.robot.simulationResetPhysics()
        self.reset_all_fields()
        for i in range(10):
            self.robot_node.moveViewpoint()
            self.robot.step(self.timeStep)
        if is_eval_only:
            time.sleep(1)
        return self.calc_state()
Exemplo n.º 2
0
class WebotsKukaEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma

        self.desired_pos = np.array([-0.15, 0.185, 0])

        self.objects_initial_positions = {
            "ring": [0.53, 0.015, 0],
            "ball": [0.4, 0.025, 0],
            "box": [0.5, 0.02, 0]
        }

        self._supervisor = Supervisor()
        self._timestep = 32

        self._touch = 0
        self._num_rew = 0
        # List of objects to be taken into consideration
        self._objects_names = []
        self._objects = {}

        # Get Fingers
        self.finger_names = ["TouchSensor1", "TouchSensor2"]
        self.fingers = {}

        for fin in self.finger_names:
            self.fingers[fin] = self._supervisor.getFromDef(fin)

        # List of links name
        self._link_names = [
            "arm1", "arm2", "arm3", "arm4", "arm5", "finger1", "finger2"
        ]

        # Get motors
        self._link_objects = {}
        for link in self._link_names:
            self._link_objects[link] = self._supervisor.getMotor(link)

        # Get Touch sensor  (Added)
        self._touch_sensor_names = ["touch sensor1", "touch sensor2"]
        #Added
        self._touch_sensor_object = {}
        for sensor in self._touch_sensor_names:
            self._touch_sensor_object[
                sensor] = self._supervisor.getTouchSensor(sensor)
            self._touch_sensor_object[sensor].enable(self._timestep)

        # Get sensors
        self._link_position_sensors = {}
        self._min_position = {}
        self._max_position = {}
        for link in self._link_names:
            self._link_position_sensors[link] = self._link_objects[
                link].getPositionSensor()
            self._link_position_sensors[link].enable(self._timestep)
            self._min_position[link] = self._link_objects[link].getMinPosition(
            )
            self._max_position[link] = self._link_objects[link].getMaxPosition(
            )

        self._supervisor.step(self._timestep)

        # self.camera = self._supervisor.getCamera("camera");
        # self.camera.enable(self._timestep);

        self._floor = 0
        self._finger = 0
        self._touch = 0

        self._i = 0

###### UTIL FUNCTIONS - START ######

    def get_links_num(self):
        return len(self._link_names)

    def set_timestep_simulation(self, step):
        self._timestep = step

    def set_objects_names(self, names):
        self._objects_names = names
        for obj in self._objects_names:
            self._objects[obj] = self._supervisor.getFromDef(obj)

    def set_finger_name(self, name):
        self.finger_names = name
        self.finger = self._supervisor.getFromDef(name)

    def get_link_positions(self):
        positions = []
        for link in self._link_names:
            positions.append(self._link_position_sensors[link].getValue())
        return np.array(positions)

    def set_link_positions(self, positions):
        assert (len(positions) == len(self._link_names))
        i = 0
        for link in self._link_names:
            self._link_objects[link].setPosition(positions[i])
            i = i + 1

    def get_objects_positions(self):
        obj_positions = {}
        for obj in self._objects_names:
            obj_positions[obj] = self._objects[obj].getPosition()
        return obj_positions

    def object_position(self):
        obj_position = self._objects[self._objects_names[0]].getPosition()
        return obj_position

    def get_state(self):
        state = []
        for link in self._link_names:
            state = state + [self._link_position_sensors[link].getValue()]
        for obj in self._objects_names:
            state = state + self._objects[obj].getPosition()
        return np.array(state)

    def object_finger_distance(self, object_name):

        finger_pos_00 = self.fingers["TouchSensor1"].getPosition()
        finger_pos_00_array = np.array(finger_pos_00)

        finger_pos_01 = self.fingers["TouchSensor2"].getPosition()
        finger_pos_01_array = np.array(finger_pos_01)

        obj_pos = self.get_objects_positions()[object_name]
        obj_pos_array = np.array(obj_pos)

        if (object_name == "ring" or object_name == "box"):
            finger_distances = [
                np.linalg.norm(finger_pos_00_array - obj_pos_array +
                               [0, 0, +0.019]),
                np.linalg.norm(finger_pos_01_array - obj_pos_array +
                               [0, 0, 0.019])
            ]
        else:
            finger_distances = [
                np.linalg.norm(finger_pos_00_array - obj_pos_array +
                               [0, 0, +0]),
                np.linalg.norm(finger_pos_01_array - obj_pos_array + [0, 0, 0])
            ]

        return np.array(finger_distances).mean()

    def randomizeEnvironment(self):
        object_notToGrasp = self._supervisor.getFromDef(self._objects_names[1])
        _translation = object_notToGrasp.getField("translation")
        array_pos = []
        array_pos.append(object_notToGrasp.getPosition())
        array_pos.append([1, 0, 1])
        array_pos.append([-0.15, 0.195, 0])
        rand = randrange(0, 3)
        new_position = array_pos[rand]
        _translation.setSFVec3f(new_position)

###### UTIL FUNCTIONS -  END  ######

###### GYM FUNCTIONS -  START  ######

    def step(self, action):
        new_state = []
        reward = 0
        done = False
        obs = []

        obs = self._get_obs()
        reward = self._compute_reward(obs, done)
        self.set_link_positions(action)
        self._supervisor.step(self._timestep)

        return new_state, reward, done, obs

    def _compute_reward(self, observations, done):
        sensor_mean = (observations["TOUCH_SENSORS"] / 200).mean()

        obj_pos = observations["OBJECT_POSITION"]
        floor_distance = self.alpha * np.exp(
            -(np.linalg.norm(obj_pos - self.desired_pos)**2) /
            (2 * dist_dev_alpha**2))

        finger_distance = self.beta * np.exp(-(
            (self.object_finger_distance(self._objects_names[0]) + 0.9)**2) /
                                             (2 * dist_dev_beta**2))

        touch = self.gamma * sensor_mean

        touch_distance = touch * np.exp(
            -(self.object_finger_distance(self._objects_names[0])**2) /
            (2 * dist_dev_gamma**2))
        self._num_rew += 1
        self._touch += touch_distance

        #reward = floor_distance                                                #Per spostamento
        reward = finger_distance + touch_distance  #Per presa

        self._floor += floor_distance
        self._finger += finger_distance
        self._touch += touch_distance

        self._i += 1
        '''
        print("------------------------")
        print("Floor Distance: ")
        print(floor_distance)
        print("Finger Distance")
        print(finger_distance)
        print("Reward: ")
        print(reward)
        print("------------------------")
        '''

        # print(
        #     "Dfl: %-10.8f Dfi: %-10.8f D*T: %-10.8f"
        #     % (floor_distance, finger_distance, touch_distance)
        # )

        return reward

    def _get_obs(self):
        joint_positions = self.get_link_positions()

        touch_sensors = np.array([
            np.linalg.norm(self._touch_sensor_object[sensor].getValues())
            for sensor in self._touch_sensor_names
        ])
        obj = self.object_position()
        obj_position = np.array(obj)

        obs = {
            "JOINT_POSITIONS": joint_positions,
            "TOUCH_SENSORS": touch_sensors,
            "OBJECT_POSITION": obj_position,
        }
        return obs

    def reset(self):

        #print("Reset")

        #@

        if (self._i != 0):
            print("floor: ", self._floor, "   finger: ", self._finger,
                  "   touch: ", self._touch)
        self._i = 0
        self._floor = 0
        self._finger = 0
        self._touch = 0
        self._supervisor.simulationReset()
        self._supervisor.simulationResetPhysics()
        self._supervisor.step(1)

        #self.randomizeEnvironment()

        for link in self._link_names:
            self._link_position_sensors[link].enable(self._timestep)

        for sensor in self._touch_sensor_names:
            self._touch_sensor_object[sensor].enable(self._timestep)

    def render(self, mode='human', close=False):
        pass

###### GYM FUNCTIONS -   END   ######

###### CLASSIFIER FUNCITIONS - START ######

    def _objectPositionClassifier(self, object_position,
                                  object_initial_position):
        object_position = np.array(object_position)
        object_initial_position = np.array(object_initial_position)
        diff = np.linalg.norm(object_position - object_initial_position)
        return diff <= 0.001

    def _fingerDistanceClassifier(self, finger_distance):
        return finger_distance <= 0.05

    def _desiredPosClassifier(self, object_position, desired_pos):
        diff = np.linalg.norm(object_position - desired_pos)
        return diff <= 0.03

    def _touchSensorsClassifier(self, touch_sensors):
        esito = True
        for touch_sensor in touch_sensors:
            esito = esito and (touch_sensor >= 300)
        return esito

    def _jointPositionClassifier(self, joint_positions):
        esito = True
        for joint_position in joint_positions:
            esito = esito and (abs(joint_position) <= 0.01)
        return esito

    def _getValuesFromSensors(self):
        obs = self._get_obs()
        values = {}
        i = 0
        obj_names = self._objects_names.copy()
        obj_names.sort()
        for obj_name in self._objects_names:
            values[i] = self._objectPositionClassifier(
                self.get_objects_positions()[obj_name],
                self.objects_initial_positions[obj_name])
            values[i + 1] = self._fingerDistanceClassifier(
                self.object_finger_distance(obj_name))
            values[i + 2] = self._desiredPosClassifier(
                self.get_objects_positions()[obj_name], self.desired_pos)
            i += 3
        values[i] = self._touchSensorsClassifier(obs["TOUCH_SENSORS"])
        values[i + 1] = self._jointPositionClassifier(
            self.get_link_positions())
        return values