def gym_init(self):

        log('[ENV] gym_init called')

        # Gym initialization
        # Actions - [gain_factor_l_hip_y, gain_factor_r_hip_y]
        # States - [torso_alpha, torso_beta, torso_gamma, d_torso_alpha, d_torso_beta, d_torso_gamma, torso_x, torso_y, d_torso_x, d_torso_y]

        # Set the max and min gain factor
        # The gain factor is multiplied with the joint gain
        self.gain_factor_max = 1.0
        self.gain_factor_min = 1.0

        # Initialize the gain factors
        self.gain_factor_l_hip_y = 1.0
        self.gain_factor_r_hip_y = 1.0

        # Initialize the action and observation spaces
        obs = np.array([np.inf] * 10)
        self.action_space = spaces.Box(low=np.array([0.75, 0.75]),
                                       high=np.array([1.0, 1.0]))
        self.observation_space = spaces.Box(-obs, obs)

        # Variable for the number of steps in the episode
        self.step_counter = 0
示例#2
0
    def _step(self, actions):

        # Set the actions
        self.oscillator_thread.self_action(actions)

        # Time for the actions to take effect
        time.sleep(1.0)

        # Make observation
        self._self_observe()

        # Calculate the reward
        # Since the robot starts at x=0,y=0 and faces the x-direction, the reward is calculated as -1.0*abs(y position)
        objpos = self.oscillator_thread.monitor_thread.objpos
        fallen = self.oscillator_thread.monitor_thread.fallen
        torso_orientation = self.oscillator_thread.monitor_thread.torso_euler_angles
        forward_x = objpos[0]
        deviation = objpos[1]
        torso_gamma = torso_orientation[2]

        reward = ALPHA * (-abs(deviation)) + BETA * forward_x + THETA * (
            -abs(torso_gamma))

        log('[ENV] Deviation: {0} X-distance: {1} Torso-gamma: {2}'.format(
            deviation, forward_x, torso_gamma))

        # Increment the step counter
        self.step_counter += 1

        # Large negative reward if the robot falls
        if fallen:
            reward -= 100.0

        return self.observation, reward, self.oscillator_thread.terminal, {}
示例#3
0
 def _close(self):
     log('[ENV] Stopping the environment')
     self.oscillator_thread.monitor_thread.stop()
     self.oscillator_thread.monitor_thread.join()
     # Close the VREP connection
     self.oscillator_thread.robot_handle.cleanup()
     self.oscillator_thread.stop()
     self.oscillator_thread.join()
    def destroy(self):

        log('[ENV] destroy called')

        # Stop the monitoring thread
        self.monitor_thread.stop()

        # Close the VREP connection
        self.robot_handle.cleanup()
    def _reset(self):

        log('[ENV] _reset called')

        self.monitor_thread.stop()
        self.matsuoka_init()
        self.gym_init()
        self._self_observe()  # Set the observation
        return self.observation
def force_sensor_callback(data):

    global prev_z_force, curr_z_force

    # Internal state variables of the pacemaker oscillator used as global variables
    global u1_1, u2_1, v1_1, v2_1, y1_1, y2_1, o_1, gain_1, bias_1

    # Retrieve the force plot_data as a string
    str_force_data = data.data

    # Convert into a numpy 2D array (8 rows, 3 columns)
    # Each row for 1 sensor (l1, l2, l3, l4, r1, r2, r3, r4)
    # Each row has x, y and z force values
    arr_force_data = np.fromstring(str_force_data.replace('{',
                                                          '').replace('}', ''),
                                   sep=',').reshape((8, 3))

    # The left foot sensor values (force)
    l1 = arr_force_data[0]  # toe
    l2 = arr_force_data[1]  # toe
    l3 = arr_force_data[2]  # heel
    l4 = arr_force_data[3]  # heel

    # The right foot sensor values (force)
    r1 = arr_force_data[4]  # toe
    r2 = arr_force_data[5]  # toe
    r3 = arr_force_data[6]  # heel
    r4 = arr_force_data[7]  # heel

    # Average z forces for left foot
    l_toe_avg_z = (l1[2] + l2[2]) / 2.0
    l_heel_avg_z = (l3[2] + l4[2]) / 2.0
    l_avg_z = (l1[2] + l2[2] + l3[2] + l4[2]) / 4.0

    # Average z forces for right foot
    r_toe_avg_z = (r1[2] + r2[2]) / 2.0
    r_heel_avg_z = (r3[2] + r4[2]) / 2.0
    r_avg_z = (r1[2] + r2[2] + r3[2] + r4[2]) / 4.0

    # Reset the phase only when the force goes from below the threshold to above threshold
    curr_z_force = r_heel_avg_z

    if prev_z_force < force_threshold <= curr_z_force:
        # When r_heel_avg_z goes above force_threshold reset the phase
        # Sleep for 0.2s for the change to take effect
        u1_1, u2_1, v1_1, v2_1, y1_1, y2_1, o_1, gain_1, bias_1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0
        log('[ROS] **** Resetting phase ****')
        log('[ROS] prev_z_force: {0} curr_z_force: {1}'.format(
            prev_z_force, curr_z_force))
        time.sleep(0.1)

    prev_z_force = curr_z_force
示例#7
0
    def _reset(self):

        log('[ENV] Resetting the environment')

        self.oscillator_thread.monitor_thread.stop()
        self.oscillator_thread.monitor_thread.join()
        # Close the VREP connection
        self.oscillator_thread.robot_handle.cleanup()
        self.oscillator_thread.stop()
        self.oscillator_thread.join()
        self.__init__()
        self._self_observe()  # Set the observation
        return self.observation
示例#8
0
    def stop(self):
        """
        Sets the flag which will stop the thread

        :return: None
        """
        # Flag to stop the monitoring thread
        self.stop_flag = True

        # Close the monitoring vrep connection
        self.vrepio_obj.close()

        # Calculate the average foot step size
        self.calc_avg_footstep()

        # Calculate the variance of the torso orientation
        self.calc_var_orientation()

        # Store the results
        GaitEvalResult.fallen = self.fallen
        GaitEvalResult.up_time = self.up_time
        GaitEvalResult.x = self.x
        GaitEvalResult.y = abs(self.y)
        GaitEvalResult.avg_footstep = self.avg_footstep
        GaitEvalResult.var_torso_orientation_alpha = self.var_torso_orientation_alpha
        GaitEvalResult.var_torso_orientation_beta = self.var_torso_orientation_beta
        GaitEvalResult.var_torso_orientation_gamma = self.var_torso_orientation_gamma

        print 'Fall:{0}, ' \
              'Up_time:{1}, ' \
              'X-distance:{2}, ' \
              'Deviation (mag):{3}, ' \
              'Avg. step length:{4}, ' \
              'Torso orientation variance Alpha-Beta-Gamma:{5}|{6}|{7}'.format(self.fallen,
                                                                               self.up_time,
                                                                               self.x,
                                                                               abs(self.y),
                                                                               self.avg_footstep,
                                                                               self.var_torso_orientation_alpha,
                                                                               self.var_torso_orientation_beta,
                                                                               self.var_torso_orientation_gamma)

        log('[MON] Monitor finished')
示例#9
0
    def __init__(self):

        log('[ENV] Initilializing environment')

        # Gym initialization
        # Actions - [gain_factor_l_hip_y, gain_factor_r_hip_y]
        # States - [torso_alpha, torso_beta, torso_gamma, d_torso_alpha, d_torso_beta, d_torso_gamma, torso_x, torso_y, d_torso_x, d_torso_y]

        # Set the max and min gain factor
        # The gain factor is multiplied with the joint gain
        self.gain_factor_max = 1.0
        self.gain_factor_min = 1.0

        # Initialize the gain factors
        self.gain_factor_l_hip_y = 1.0
        self.gain_factor_r_hip_y = 1.0

        # Initialize the action and observation spaces
        obs = np.array([np.inf] * 10)
        self.action_space = spaces.Box(low=np.array([0.75, 0.75]),
                                       high=np.array([1.0, 1.0]))
        self.observation_space = spaces.Box(-obs, obs)

        # Initilize the observation variables
        self.observation = None

        # Variable for the number of steps in the episode
        self.step_counter = 0

        # Set the oscillator thread
        # This will initialize the VREP scene and set the robot to the starting position
        self.oscillator_thread = Oscillator3Thread()

        # Start the oscillator thread
        # This will start the matsuoka walk
        self.oscillator_thread.start()
    def matsuoka_init(self):

        # Set the home directory
        self.home_dir = os.path.expanduser('~')

        log('[ENV] matsuoka_init called')

        # Max time to walk (in seconds)
        self.max_walk_time = 20

        # Variables from the best chromosome
        position_vector = [
            0.7461913734531209, 0.8422944031253159, 0.07043758116681641,
            0.14236621222553963, 0.48893497409925746, 0.5980055418720059,
            0.740811806645801, -0.11618361090424223, 0.492832184960149,
            -0.2949145038394889, 0.175450703085948, -0.3419733470484183
        ]

        self.kf = position_vector[0]
        self.GAIN1 = position_vector[1]
        self.GAIN2 = position_vector[2]
        self.GAIN3 = position_vector[3]
        self.GAIN4 = position_vector[4]
        self.GAIN5 = position_vector[5]
        self.GAIN6 = position_vector[6]
        self.BIAS1 = position_vector[7]
        self.BIAS2 = position_vector[8]
        self.BIAS3 = position_vector[9]
        self.BIAS4 = position_vector[10]
        self.k = position_vector[11]

        # Create the robot handle
        # Try to connect to VREP
        try_counter = 0
        try_max = 5
        self.robot_handle = None
        while self.robot_handle is None:

            # Close existing connections if any
            pypot.vrep.close_all_connections()

            try:
                log('[ENV] Trying to create robot handle (attempt: {0} of {1})'
                    .format(try_counter, try_max))
                try_counter += 1
                self.robot_handle = Nico(
                    sync_sleep_time=0.1,
                    motor_config=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json'
                    ),
                    vrep=True,
                    vrep_host='127.0.0.1',
                    vrep_port=19997,
                    vrep_scene=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt'
                    ))

            except Exception, e:
                log('[ENV] Could not connect to VREP')
                log('[ENV] Error: {0}'.format(e.message))
                time.sleep(1.0)

            if try_counter > try_max:
                log('[ENV] Unable to create robot handle after {0} tries'.
                    format(try_max))
                exit(1)
    def _step(self, actions):

        # Run 10 iterations of matsuoka loop followed by 1 iteration of checking torso

        # Make observation, take action, calculate reward
        # Scale the actions so that they are within range
        actions = self.scale_actions(actions)

        log('Scaled actions: {}'.format(actions))

        # Set the gain factors
        self.gain_factor_l_hip_y = actions[0]
        self.gain_factor_r_hip_y = actions[1]

        #for t in np.arange(0.0, max_time, dt):

        for i in range(10):

            # Increment the up time variable
            self.up_t += self.lower_control_dt

            # Calculate the current angles of the l and r saggital hip joints
            self.feedback_angles = self.robot_handle.get_angles(
                ['l_hip_y', 'r_hip_y'])

            # Calculate next state of oscillator 1 (pacemaker)
            self.f1_1, self.f2_1 = 0.0, 0.0
            self.s1_1, self.s2_1 = 0.0, 0.0
            self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1 = self.oscillator_next(
                u1=self.u1_1,
                u2=self.u2_1,
                v1=self.v1_1,
                v2=self.v2_1,
                y1=self.y1_1,
                y2=self.y2_1,
                f1=self.f1_1,
                f2=self.f2_1,
                s1=self.s1_1,
                s2=self.s2_1,
                bias=self.bias_1,
                gain=self.gain_1,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 2
            # w_ij -> j=1 (oscillator 1) is master, i=2 (oscillator 2) is slave
            # Gain factor set by the higher level control is set here
            self.w_21 = 1.0
            self.f1_2, self.f2_2 = self.k * self.feedback_angles[
                'l_hip_y'], -self.k * self.feedback_angles['l_hip_y']
            self.s1_2, self.s2_2 = self.w_21 * self.u1_1, self.w_21 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2 = self.oscillator_next(
                u1=self.u1_2,
                u2=self.u2_2,
                v1=self.v1_2,
                v2=self.v2_2,
                y1=self.y1_2,
                y2=self.y2_2,
                f1=self.f1_2,
                f2=self.f2_2,
                s1=self.s1_2,
                s2=self.s2_2,
                bias=self.bias_2,
                gain=self.gain_factor_l_hip_y * self.gain_2,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 3
            # w_ij -> j=1 (oscillator 1) is master, i=3 (oscillator 3) is slave
            # Gain factor set by the higher level control is set here
            self.w_31 = -1.0
            self.f1_3, self.f2_3 = self.k * self.feedback_angles[
                'r_hip_y'], -self.k * self.feedback_angles['r_hip_y']
            self.s1_3, self.s2_3 = self.w_31 * self.u1_1, self.w_31 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3 = self.oscillator_next(
                u1=self.u1_3,
                u2=self.u2_3,
                v1=self.v1_3,
                v2=self.v2_3,
                y1=self.y1_3,
                y2=self.y2_3,
                f1=self.f1_3,
                f2=self.f2_3,
                s1=self.s1_3,
                s2=self.s2_3,
                bias=self.bias_3,
                gain=self.gain_factor_r_hip_y * self.gain_3,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 4
            # w_ij -> j=2 (oscillator 2) is master, i=4 (oscillator 4) is slave
            self.w_42 = -1.0
            self.f1_4, self.f2_4 = 0.0, 0.0
            self.s1_4, self.s2_4 = self.w_42 * self.u1_2, self.w_42 * self.u2_2  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4 = self.oscillator_next(
                u1=self.u1_4,
                u2=self.u2_4,
                v1=self.v1_4,
                v2=self.v2_4,
                y1=self.y1_4,
                y2=self.y2_4,
                f1=self.f1_4,
                f2=self.f2_4,
                s1=self.s1_4,
                s2=self.s2_4,
                bias=self.bias_4,
                gain=self.gain_4,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 5
            # w_ij -> j=3 (oscillator 3) is master, i=5 (oscillator 5) is slave
            self.w_53 = -1.0
            self.f1_5, self.f2_5 = 0.0, 0.0
            self.s1_5, self.s2_5 = self.w_53 * self.u1_3, self.w_53 * self.u2_3  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5 = self.oscillator_next(
                u1=self.u1_5,
                u2=self.u2_5,
                v1=self.v1_5,
                v2=self.v2_5,
                y1=self.y1_5,
                y2=self.y2_5,
                f1=self.f1_5,
                f2=self.f2_5,
                s1=self.s1_5,
                s2=self.s2_5,
                bias=self.bias_5,
                gain=self.gain_5,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 6
            # w_ij -> j=2 (oscillator 2) is master, i=6 (oscillator 6) is slave
            self.w_62 = -1.0
            self.f1_6, self.f2_6 = 0.0, 0.0
            self.s1_6, self.s2_6 = self.w_62 * self.u1_2, self.w_62 * self.u2_2  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6 = self.oscillator_next(
                u1=self.u1_6,
                u2=self.u2_6,
                v1=self.v1_6,
                v2=self.v2_6,
                y1=self.y1_6,
                y2=self.y2_6,
                f1=self.f1_6,
                f2=self.f2_6,
                s1=self.s1_6,
                s2=self.s2_6,
                bias=self.bias_6,
                gain=self.gain_6,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 7
            # w_ij -> j=3 (oscillator 3) is master, i=7 (oscillator 7) is slave
            self.w_73 = -1.0
            self.f1_7, self.f2_7 = 0.0, 0.0
            self.s1_7, self.s2_7 = self.w_73 * self.u1_3, self.w_73 * self.u2_3  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7 = self.oscillator_next(
                u1=self.u1_7,
                u2=self.u2_7,
                v1=self.v1_7,
                v2=self.v2_7,
                y1=self.y1_7,
                y2=self.y2_7,
                f1=self.f1_7,
                f2=self.f2_7,
                s1=self.s1_7,
                s2=self.s2_7,
                bias=self.bias_7,
                gain=self.gain_7,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 8
            # w_ij -> j=1 (oscillator 1) is master, i=8 (oscillator 8) is slave
            self.w_81 = 1.0
            self.f1_8, self.f2_8 = 0.0, 0.0
            self.s1_8, self.s2_8 = self.w_81 * self.u1_1, self.w_81 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8 = self.oscillator_next(
                u1=self.u1_8,
                u2=self.u2_8,
                v1=self.v1_8,
                v2=self.v2_8,
                y1=self.y1_8,
                y2=self.y2_8,
                f1=self.f1_8,
                f2=self.f2_8,
                s1=self.s1_8,
                s2=self.s2_8,
                bias=self.bias_8,
                gain=self.gain_8,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 9
            # w_ij -> j=8 (oscillator 8) is master, i=9 (oscillator 9) is slave
            self.w_98 = -1.0
            self.f1_9, self.f2_9 = 0.0, 0.0
            self.s1_9, self.s2_9 = self.w_98 * self.u1_8, self.w_98 * self.u2_8  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9 = self.oscillator_next(
                u1=self.u1_9,
                u2=self.u2_9,
                v1=self.v1_9,
                v2=self.v2_9,
                y1=self.y1_9,
                y2=self.y2_9,
                f1=self.f1_9,
                f2=self.f2_9,
                s1=self.s1_9,
                s2=self.s2_9,
                bias=self.bias_9,
                gain=self.gain_9,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 10
            # w_ij -> j=1 (oscillator 1) is master, i=10 (oscillator 10) is slave
            self.w_101 = 1.0
            self.f1_10, self.f2_10 = 0.0, 0.0
            self.s1_10, self.s2_10 = self.w_101 * self.u1_1, self.w_101 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10 = self.oscillator_next(
                u1=self.u1_10,
                u2=self.u2_10,
                v1=self.v1_10,
                v2=self.v2_10,
                y1=self.y1_10,
                y2=self.y2_10,
                f1=self.f1_10,
                f2=self.f2_10,
                s1=self.s1_10,
                s2=self.s2_10,
                bias=self.bias_10,
                gain=self.gain_10,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 11
            # w_ij -> j=10 (oscillator 10) is master, i=11 (oscillator 11) is slave
            self.w_1110 = -1.0
            self.f1_11, self.f2_11 = 0.0, 0.0
            self.s1_11, self.s2_11 = self.w_1110 * self.u1_10, self.w_1110 * self.u2_10  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11 = self.oscillator_next(
                u1=self.u1_11,
                u2=self.u2_11,
                v1=self.v1_11,
                v2=self.v2_11,
                y1=self.y1_11,
                y2=self.y2_11,
                f1=self.f1_11,
                f2=self.f2_11,
                s1=self.s1_11,
                s2=self.s2_11,
                bias=self.bias_11,
                gain=self.gain_11,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 12
            # w_ij -> j=1 (oscillator 1) is master, i=12 (oscillator 12) is slave
            self.w_121 = -1.0
            self.f1_12, self.f2_12 = 0.0, 0.0
            self.s1_12, self.s2_12 = self.w_121 * self.u1_1, self.w_121 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12 = self.oscillator_next(
                u1=self.u1_12,
                u2=self.u2_12,
                v1=self.v1_12,
                v2=self.v2_12,
                y1=self.y1_12,
                y2=self.y2_12,
                f1=self.f1_12,
                f2=self.f2_12,
                s1=self.s1_12,
                s2=self.s2_12,
                bias=self.bias_12,
                gain=self.gain_12,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 13
            # w_ij -> j=1 (oscillator 1) is master, i=13 (oscillator 13) is slave
            self.w_131 = 1.0
            self.f1_13, self.f2_13 = 0.0, 0.0
            self.s1_13, self.s2_13 = self.w_131 * self.u1_1, self.w_131 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13 = self.oscillator_next(
                u1=self.u1_13,
                u2=self.u2_13,
                v1=self.v1_13,
                v2=self.v2_13,
                y1=self.y1_13,
                y2=self.y2_13,
                f1=self.f1_13,
                f2=self.f2_13,
                s1=self.s1_13,
                s2=self.s2_13,
                bias=self.bias_13,
                gain=self.gain_13,
                dt=self.lower_control_dt)

            # Set the joint positions
            self.current_angles = {
                'l_hip_y': self.o_2,
                'r_hip_y': self.o_3,
                'l_knee_y': self.o_4,
                'r_knee_y': self.o_5,
                'l_ankle_y': self.o_6,
                'r_ankle_y': self.o_7,
                'l_hip_x': self.o_8,
                'l_ankle_x': self.o_9,
                'r_hip_x': self.o_10,
                'r_ankle_x': self.o_11,
                'l_shoulder_y': self.o_12,
                'r_shoulder_y': self.o_13
            }
            self.robot_handle.set_angles(self.current_angles)

            time.sleep(self.lower_control_dt)

            # Check if the robot has fallen
            if self.monitor_thread.fallen:
                break

        # Make observation
        self._self_observe()

        # Calculate the reward
        # Since the robot starts at x=0,y=0 and faces the x-direction, the reward is calculated as -1.0*abs(y position)
        reward = -1.0 * abs(self.monitor_thread.y)

        # Increment the step counter
        self.step_counter += 1

        # Check if the current state is terminal
        terminal = False
        if self.monitor_thread.fallen or not (self.up_t < self.max_walk_time):
            terminal = True

        # Large negative reward if the robot falls
        if self.monitor_thread.fallen:
            reward -= 100.0

        # No need to put a sleep here, it is taken care of inside the matsuoka loop

        return self.observation, reward, terminal, {}
    def __init__(self):

        # Call init of superclass
        threading.Thread.__init__(self)

        # Set the home directory
        self.home_dir = os.path.expanduser('~')

        # Set the name of the VREP object
        self.vrep_body_object = 'torso_11_respondable'

        # This is the lowest possible gain that can be set, the highest possible gain in 1.0 (no change)
        self.lowest_possible_gain = LOWEST_POSSIBLE_GAIN

        # Initially the 2 gain factors are set to 1.0 (no change)
        # These gain factors will be set by the RL algorithm (using the action method)
        self.gain_factor_l_hip_y = 1.0
        self.gain_factor_r_hip_y = 1.0

        # Variable to indicate if terminal state has been reached
        self.terminal = False

        # Variable to stop the thread
        self.stop_thread = False

        # Max time to walk (in seconds)
        self.max_walk_time = MAX_WALK_TIME

        # Variables from the best chromosome
        position_vector = BEST_CHROMOSOME

        self.kf = position_vector[0]
        self.GAIN1 = position_vector[1]
        self.GAIN2 = position_vector[2]
        self.GAIN3 = position_vector[3]
        self.GAIN4 = position_vector[4]
        self.GAIN5 = position_vector[5]
        self.GAIN6 = position_vector[6]
        self.BIAS1 = position_vector[7]
        self.BIAS2 = position_vector[8]
        self.BIAS3 = position_vector[9]
        self.BIAS4 = position_vector[10]
        self.k = position_vector[11]

        # Create the robot handle
        # Try to connect to VREP
        try_counter = 0
        try_max = 5
        self.robot_handle = None
        while self.robot_handle is None:

            # Close existing connections if any
            pypot.vrep.close_all_connections()

            try:
                log('[OSC] Trying to create robot handle (attempt: {0} of {1})'
                    .format(try_counter, try_max))
                try_counter += 1
                self.robot_handle = Nico(
                    sync_sleep_time=0.1,
                    motor_config=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json'
                    ),
                    vrep=True,
                    vrep_host='127.0.0.1',
                    vrep_port=19997,
                    vrep_scene=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs_with_path.ttt'
                    ))

            except Exception, e:
                log('[OSC] Could not connect to VREP')
                log('[OSC] Error: {0}'.format(e.message))
                time.sleep(1.0)

            if try_counter > try_max:
                log('[OSC] Unable to create robot handle after {0} tries'.
                    format(try_max))
                exit(1)
示例#13
0
                vrep_scene=os.path.join(
                    home_dir,
                    'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt'
                ))
        except Exception, e:
            log('[OSC] Could not connect to VREP')
            log('[OSC] Error: {0}'.format(e.message))
            time.sleep(1.0)

        if try_counter > try_max:
            log('[OSC] Unable to create robot handle after {0} tries'.format(
                try_max))
            exit(1)

    if robot_handle is not None:
        log('[OSC] Successfully connected to VREP')

    # Start the monitoring thread
    monitor_thread = RobotMonitorThread(portnum=19998,
                                        objname='torso_11_respondable',
                                        height_threshold=0.3)
    monitor_thread.start()
    log('[OSC] Started monitoring thread')

    # Wait 1s for the monitoring thread
    time.sleep(1.0)

    # Note the current position
    start_pos_x = monitor_thread.x
    start_pos_y = monitor_thread.y
    start_pos_z = monitor_thread.z
示例#14
0
                vrep_scene=os.path.join(
                    home_dir,
                    'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt'
                ))
        except Exception, e:
            log('[OSC] Could not connect to VREP')
            log('[OSC] Error: {0}'.format(e.message))
            time.sleep(1.0)

        if try_counter > try_max:
            log('[OSC] Unable to create robot handle after {0} tries'.format(
                try_max))
            exit(1)

    if robot_handle is not None:
        log('[OSC] Successfully connected to VREP')

    # Start the monitoring thread
    monitor_thread = RobotMonitorThread(portnum=19998,
                                        objname='torso_11_respondable',
                                        height_threshold=0.3)
    monitor_thread.start()
    log('[OSC] Started monitoring thread')

    # Note the current position
    start_pos_x = monitor_thread.x
    start_pos_y = monitor_thread.y
    start_pos_z = monitor_thread.z

    # Wait 1s for the monitoring thread
    time.sleep(1.0)
    def run(self):

        for t in np.arange(0.0, self.max_walk_time, self.lower_control_dt):

            # Increment the up time variable
            self.up_t += self.lower_control_dt

            # Calculate the current angles of the l and r saggital hip joints
            self.feedback_angles = self.robot_handle.get_angles(
                ['l_hip_y', 'r_hip_y'])

            # For verification - print every 100 iterations
            #if t in np.arange(0.0, self.max_walk_time, self.lower_control_dt*100):
            #log('[OSC] Feedback angles: {}'.format(self.feedback_angles))
            #log('[OSC] Gain values: {}'.format((self.gain_factor_l_hip_y, self.gain_factor_r_hip_y)))

            # Calculate next state of oscillator 1 (pacemaker)
            self.f1_1, self.f2_1 = 0.0, 0.0
            self.s1_1, self.s2_1 = 0.0, 0.0
            self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1 = self.oscillator_next(
                u1=self.u1_1,
                u2=self.u2_1,
                v1=self.v1_1,
                v2=self.v2_1,
                y1=self.y1_1,
                y2=self.y2_1,
                f1=self.f1_1,
                f2=self.f2_1,
                s1=self.s1_1,
                s2=self.s2_1,
                bias=self.bias_1,
                gain=self.gain_1,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 2
            # w_ij -> j=1 (oscillator 1) is master, i=2 (oscillator 2) is slave
            # Gain factor set by the higher level control is set here
            self.w_21 = 1.0
            self.f1_2, self.f2_2 = self.k * self.feedback_angles[
                'l_hip_y'], -self.k * self.feedback_angles['l_hip_y']
            self.s1_2, self.s2_2 = self.w_21 * self.u1_1, self.w_21 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2 = self.oscillator_next(
                u1=self.u1_2,
                u2=self.u2_2,
                v1=self.v1_2,
                v2=self.v2_2,
                y1=self.y1_2,
                y2=self.y2_2,
                f1=self.f1_2,
                f2=self.f2_2,
                s1=self.s1_2,
                s2=self.s2_2,
                bias=self.bias_2,
                gain=self.gain_factor_l_hip_y * self.gain_2,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 3
            # w_ij -> j=1 (oscillator 1) is master, i=3 (oscillator 3) is slave
            # Gain factor set by the higher level control is set here
            self.w_31 = -1.0
            self.f1_3, self.f2_3 = self.k * self.feedback_angles[
                'r_hip_y'], -self.k * self.feedback_angles['r_hip_y']
            self.s1_3, self.s2_3 = self.w_31 * self.u1_1, self.w_31 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3 = self.oscillator_next(
                u1=self.u1_3,
                u2=self.u2_3,
                v1=self.v1_3,
                v2=self.v2_3,
                y1=self.y1_3,
                y2=self.y2_3,
                f1=self.f1_3,
                f2=self.f2_3,
                s1=self.s1_3,
                s2=self.s2_3,
                bias=self.bias_3,
                gain=self.gain_factor_r_hip_y * self.gain_3,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 4
            # w_ij -> j=2 (oscillator 2) is master, i=4 (oscillator 4) is slave
            self.w_42 = -1.0
            self.f1_4, self.f2_4 = 0.0, 0.0
            self.s1_4, self.s2_4 = self.w_42 * self.u1_2, self.w_42 * self.u2_2  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4 = self.oscillator_next(
                u1=self.u1_4,
                u2=self.u2_4,
                v1=self.v1_4,
                v2=self.v2_4,
                y1=self.y1_4,
                y2=self.y2_4,
                f1=self.f1_4,
                f2=self.f2_4,
                s1=self.s1_4,
                s2=self.s2_4,
                bias=self.bias_4,
                gain=self.gain_4,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 5
            # w_ij -> j=3 (oscillator 3) is master, i=5 (oscillator 5) is slave
            self.w_53 = -1.0
            self.f1_5, self.f2_5 = 0.0, 0.0
            self.s1_5, self.s2_5 = self.w_53 * self.u1_3, self.w_53 * self.u2_3  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5 = self.oscillator_next(
                u1=self.u1_5,
                u2=self.u2_5,
                v1=self.v1_5,
                v2=self.v2_5,
                y1=self.y1_5,
                y2=self.y2_5,
                f1=self.f1_5,
                f2=self.f2_5,
                s1=self.s1_5,
                s2=self.s2_5,
                bias=self.bias_5,
                gain=self.gain_5,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 6
            # w_ij -> j=2 (oscillator 2) is master, i=6 (oscillator 6) is slave
            self.w_62 = -1.0
            self.f1_6, self.f2_6 = 0.0, 0.0
            self.s1_6, self.s2_6 = self.w_62 * self.u1_2, self.w_62 * self.u2_2  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6 = self.oscillator_next(
                u1=self.u1_6,
                u2=self.u2_6,
                v1=self.v1_6,
                v2=self.v2_6,
                y1=self.y1_6,
                y2=self.y2_6,
                f1=self.f1_6,
                f2=self.f2_6,
                s1=self.s1_6,
                s2=self.s2_6,
                bias=self.bias_6,
                gain=self.gain_6,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 7
            # w_ij -> j=3 (oscillator 3) is master, i=7 (oscillator 7) is slave
            self.w_73 = -1.0
            self.f1_7, self.f2_7 = 0.0, 0.0
            self.s1_7, self.s2_7 = self.w_73 * self.u1_3, self.w_73 * self.u2_3  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7 = self.oscillator_next(
                u1=self.u1_7,
                u2=self.u2_7,
                v1=self.v1_7,
                v2=self.v2_7,
                y1=self.y1_7,
                y2=self.y2_7,
                f1=self.f1_7,
                f2=self.f2_7,
                s1=self.s1_7,
                s2=self.s2_7,
                bias=self.bias_7,
                gain=self.gain_7,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 8
            # w_ij -> j=1 (oscillator 1) is master, i=8 (oscillator 8) is slave
            self.w_81 = 1.0
            self.f1_8, self.f2_8 = 0.0, 0.0
            self.s1_8, self.s2_8 = self.w_81 * self.u1_1, self.w_81 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8 = self.oscillator_next(
                u1=self.u1_8,
                u2=self.u2_8,
                v1=self.v1_8,
                v2=self.v2_8,
                y1=self.y1_8,
                y2=self.y2_8,
                f1=self.f1_8,
                f2=self.f2_8,
                s1=self.s1_8,
                s2=self.s2_8,
                bias=self.bias_8,
                gain=self.gain_8,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 9
            # w_ij -> j=8 (oscillator 8) is master, i=9 (oscillator 9) is slave
            self.w_98 = -1.0
            self.f1_9, self.f2_9 = 0.0, 0.0
            self.s1_9, self.s2_9 = self.w_98 * self.u1_8, self.w_98 * self.u2_8  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9 = self.oscillator_next(
                u1=self.u1_9,
                u2=self.u2_9,
                v1=self.v1_9,
                v2=self.v2_9,
                y1=self.y1_9,
                y2=self.y2_9,
                f1=self.f1_9,
                f2=self.f2_9,
                s1=self.s1_9,
                s2=self.s2_9,
                bias=self.bias_9,
                gain=self.gain_9,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 10
            # w_ij -> j=1 (oscillator 1) is master, i=10 (oscillator 10) is slave
            self.w_101 = 1.0
            self.f1_10, self.f2_10 = 0.0, 0.0
            self.s1_10, self.s2_10 = self.w_101 * self.u1_1, self.w_101 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10 = self.oscillator_next(
                u1=self.u1_10,
                u2=self.u2_10,
                v1=self.v1_10,
                v2=self.v2_10,
                y1=self.y1_10,
                y2=self.y2_10,
                f1=self.f1_10,
                f2=self.f2_10,
                s1=self.s1_10,
                s2=self.s2_10,
                bias=self.bias_10,
                gain=self.gain_10,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 11
            # w_ij -> j=10 (oscillator 10) is master, i=11 (oscillator 11) is slave
            self.w_1110 = -1.0
            self.f1_11, self.f2_11 = 0.0, 0.0
            self.s1_11, self.s2_11 = self.w_1110 * self.u1_10, self.w_1110 * self.u2_10  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11 = self.oscillator_next(
                u1=self.u1_11,
                u2=self.u2_11,
                v1=self.v1_11,
                v2=self.v2_11,
                y1=self.y1_11,
                y2=self.y2_11,
                f1=self.f1_11,
                f2=self.f2_11,
                s1=self.s1_11,
                s2=self.s2_11,
                bias=self.bias_11,
                gain=self.gain_11,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 12
            # w_ij -> j=1 (oscillator 1) is master, i=12 (oscillator 12) is slave
            self.w_121 = -1.0
            self.f1_12, self.f2_12 = 0.0, 0.0
            self.s1_12, self.s2_12 = self.w_121 * self.u1_1, self.w_121 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12 = self.oscillator_next(
                u1=self.u1_12,
                u2=self.u2_12,
                v1=self.v1_12,
                v2=self.v2_12,
                y1=self.y1_12,
                y2=self.y2_12,
                f1=self.f1_12,
                f2=self.f2_12,
                s1=self.s1_12,
                s2=self.s2_12,
                bias=self.bias_12,
                gain=self.gain_12,
                dt=self.lower_control_dt)

            # Calculate next state of oscillator 13
            # w_ij -> j=1 (oscillator 1) is master, i=13 (oscillator 13) is slave
            self.w_131 = 1.0
            self.f1_13, self.f2_13 = 0.0, 0.0
            self.s1_13, self.s2_13 = self.w_131 * self.u1_1, self.w_131 * self.u2_1  # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j
            self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13 = self.oscillator_next(
                u1=self.u1_13,
                u2=self.u2_13,
                v1=self.v1_13,
                v2=self.v2_13,
                y1=self.y1_13,
                y2=self.y2_13,
                f1=self.f1_13,
                f2=self.f2_13,
                s1=self.s1_13,
                s2=self.s2_13,
                bias=self.bias_13,
                gain=self.gain_13,
                dt=self.lower_control_dt)

            # Set the joint positions
            self.current_angles = {
                'l_hip_y': self.o_2,
                'r_hip_y': self.o_3,
                'l_knee_y': self.o_4,
                'r_knee_y': self.o_5,
                'l_ankle_y': self.o_6,
                'r_ankle_y': self.o_7,
                'l_hip_x': self.o_8,
                'l_ankle_x': self.o_9,
                'r_hip_x': self.o_10,
                'r_ankle_x': self.o_11,
                'l_shoulder_y': self.o_12,
                'r_shoulder_y': self.o_13
            }
            self.robot_handle.set_angles(self.current_angles)

            time.sleep(self.lower_control_dt)

            # Check if the robot has fallen
            if self.monitor_thread.fallen:
                log('[OSC] Robot has fallen')
                break

        # Outside the loop it means that either time is over or robot has fallen
        self.terminal = True

        log('[OSC] Oscillator thread up time: {}'.format(self.up_t))

        # Run until stopped
        while not self.stop_thread:
            time.sleep(1.0)
示例#16
0
def calc_fitness(start_x, start_y, start_z, end_x, end_y, end_z, avg_z,
                 up_time, fitness_option):

    log('[FIT] Executing calc_fitness with fitness_option: {0}'.format(
        fitness_option))
    log('[FIT] Other arguments:')
    log('[FIT] start_x={0}, start_y={1}, start_z={2}'.format(
        start_x, start_y, start_z))
    log('[FIT] end_x={0}, end_y={1}, end_z={2}'.format(end_x, end_y, end_z))
    log('[FIT] avg_z={0}'.format(avg_z))
    log('[FIT] up_time={0}'.format(up_time))

    x_distance = end_x - start_x
    y_distance = end_y - start_y

    x_vel = x_distance / up_time  # (metres/second)

    fitness = 0.0

    if fitness_option == 1:
        # Fitness is the distance moved in the x direction (metres)
        fitness = x_distance
        log('[FIT] fitness = x_distance')

    elif fitness_option == 2:
        # Fitness is the distance moved in the x direction (metres) + up_time (minutes)
        fitness = x_distance + up_time / 60.0
        log('[FIT] fitness = x_distance + up_time/60.0')

    elif fitness_option == 3:
        # Fitness is the distance moved in the x direction * 0.3 (metres) + up_time (seconds) * 0.7
        # This formula yielded the stable walk in open loop
        fitness = x_distance * 0.3 + up_time * 0.7
        log('[FIT] fitness = x_distance*0.3 + up_time*0.7')

    elif fitness_option == 4:
        # Fitness is the straight line velocity minus a penalty for deviating from the straight line
        # Follows the formula in Cristiano's paper (coefficient values are aplha=80, gamma=100)
        fitness = 80.0 * x_vel - 100 * abs(y_distance)
        log('[FIT] fitness = 80.0*x_vel - 100*abs(y_distance)')

    elif fitness_option == 5:
        # Fitness is the sum of x-distance(m), time(minutes), x-vel(m/s)
        fitness = x_distance + (up_time / 60.0) + x_vel
        log('[FIT] fitness = x_distance + (up_time/60.0) + x_vel')

    elif fitness_option == 6:
        # Fitness is the distance moved in the x direction (metres) + up_time (seconds)*0.5
        # This formula yielded the stable walk in open loop
        fitness = x_distance + up_time * 0.5
        log('[FIT] fitness = x_distance + up_time*0.5')

    return fitness
示例#17
0
    def __init__(self, portnum, objname, height_threshold, force_threshold=10):
        """
        Initializes the RobotMonitorThread thread

        :param portnum: Port number on which the VREP remote API is listening on the server
        (different than the one used to run the main robot simulation)
        :param objname: Object name which is to be monitored
        :height_threshold: If the object's height is lower than this value, the robot is considered to have falen
        """

        # Call init of super class
        Thread.__init__(self)

        # Setup the log
        home_dir = os.path.expanduser('~')
        log('[MON] Monitor started')

        # Create a VrepIO object using a different port number than the one used to run the main robot simulation
        # Make sure that the VREP remote api on the server is listening on this port number
        # Additional ports for listening can be set up on the server side by editing the file remoteApiConnections.txt
        self.vrepio_obj = VrepIO(vrep_port=portnum)

        # The object to be monitored
        self.objname = objname

        # Threshold for falling
        self.height_threshold = height_threshold

        # Threshold for force sensor
        self.force_threshold = force_threshold

        # The position of the object
        self.objpos = None

        # Initialize the foot sensor forces
        self.l1 = 0.0
        self.l2 = 0.0
        self.l3 = 0.0
        self.l4 = 0.0
        self.l_heel_current = 0.0
        self.l_heel_previous = 0.0
        
        self.r1 = 0.0
        self.r2 = 0.0
        self.r3 = 0.0
        self.r4 = 0.0
        self.r_heel_current = 0.0
        self.r_heel_previous = 0.0

        # Lists to hold the foot positions as ('foot type',x,y) tuples
        self.foot_position = list()

        # Variable to store average foot step
        self.avg_footstep = 0.0

        # Flag for phase reset
        self.phase_reset = False


        # The x,y,z coordinates of the position
        self.x = None
        self.y = None
        self.z = None

        # The starting time
        self.start_time = time.time()
        self.up_time = 0.0

        # The average height of the robot
        self.avg_z = 0.0

        # A list to store the height at each second
        self.z_list = list()

        # A flag which can stop the thread
        self.stop_flag = False

        # Lists to store torso orientations
        self.torso_orientation_alpha = list()
        self.torso_orientation_beta = list()
        self.torso_orientation_gamma = list()

        # Variables to store the torso orientation variance
        self.var_torso_orientation_alpha = 0.0
        self.var_torso_orientation_beta = 0.0
        self.var_torso_orientation_gamma = 0.0

        # A flag to indicate if the robot has fallen
        self.fallen = False
class MatsuokaAngleFeedbackEnv(gym.Env):
    def gym_init(self):

        log('[ENV] gym_init called')

        # Gym initialization
        # Actions - [gain_factor_l_hip_y, gain_factor_r_hip_y]
        # States - [torso_alpha, torso_beta, torso_gamma, d_torso_alpha, d_torso_beta, d_torso_gamma, torso_x, torso_y, d_torso_x, d_torso_y]

        # Set the max and min gain factor
        # The gain factor is multiplied with the joint gain
        self.gain_factor_max = 1.0
        self.gain_factor_min = 1.0

        # Initialize the gain factors
        self.gain_factor_l_hip_y = 1.0
        self.gain_factor_r_hip_y = 1.0

        # Initialize the action and observation spaces
        obs = np.array([np.inf] * 10)
        self.action_space = spaces.Box(low=np.array([0.75, 0.75]),
                                       high=np.array([1.0, 1.0]))
        self.observation_space = spaces.Box(-obs, obs)

        # Variable for the number of steps in the episode
        self.step_counter = 0

    def scale_actions(self, actions):
        """
        Scales the actions to be withing the gain factor limits
        The actions are squashed using a sigmoid activation function but due to addition of noise turing training, their
        values may lie outside the [0,1] range
        0 and below is set to gain_factor_min
        1 and above is set to gain_factor_max
        In-between values are scaled to a value between max and min limits
        :param actions:
        :return: scaled actions
        """

        action_1 = actions[0]
        action_2 = actions[1]
        scaled_action_1 = 0.0
        scaled_action_2 = 0.0

        if action_1 <= 0.0:
            scaled_action_1 = self.gain_factor_min
        elif action_1 >= 1.0:
            scaled_action_1 = self.gain_factor_max
        elif 0.0 < action_1 < 1.0:
            scaled_action_1 = self.gain_factor_min + (
                (action_1 - 0.0) /
                (1.0 - 0.0)) * (self.gain_factor_max - self.gain_factor_min)

        if action_2 <= 0.0:
            scaled_action_2 = self.gain_factor_min
        elif action_2 >= 1.0:
            scaled_action_2 = self.gain_factor_max
        elif 0.0 < action_2 < 1.0:
            scaled_action_2 = self.gain_factor_min + (
                (action_2 - 0.0) /
                (1.0 - 0.0)) * (self.gain_factor_max - self.gain_factor_min)

        return [scaled_action_1, scaled_action_2]

    def matsuoka_init(self):

        # Set the home directory
        self.home_dir = os.path.expanduser('~')

        log('[ENV] matsuoka_init called')

        # Max time to walk (in seconds)
        self.max_walk_time = 20

        # Variables from the best chromosome
        position_vector = [
            0.7461913734531209, 0.8422944031253159, 0.07043758116681641,
            0.14236621222553963, 0.48893497409925746, 0.5980055418720059,
            0.740811806645801, -0.11618361090424223, 0.492832184960149,
            -0.2949145038394889, 0.175450703085948, -0.3419733470484183
        ]

        self.kf = position_vector[0]
        self.GAIN1 = position_vector[1]
        self.GAIN2 = position_vector[2]
        self.GAIN3 = position_vector[3]
        self.GAIN4 = position_vector[4]
        self.GAIN5 = position_vector[5]
        self.GAIN6 = position_vector[6]
        self.BIAS1 = position_vector[7]
        self.BIAS2 = position_vector[8]
        self.BIAS3 = position_vector[9]
        self.BIAS4 = position_vector[10]
        self.k = position_vector[11]

        # Create the robot handle
        # Try to connect to VREP
        try_counter = 0
        try_max = 5
        self.robot_handle = None
        while self.robot_handle is None:

            # Close existing connections if any
            pypot.vrep.close_all_connections()

            try:
                log('[ENV] Trying to create robot handle (attempt: {0} of {1})'
                    .format(try_counter, try_max))
                try_counter += 1
                self.robot_handle = Nico(
                    sync_sleep_time=0.1,
                    motor_config=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json'
                    ),
                    vrep=True,
                    vrep_host='127.0.0.1',
                    vrep_port=19997,
                    vrep_scene=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt'
                    ))

            except Exception, e:
                log('[ENV] Could not connect to VREP')
                log('[ENV] Error: {0}'.format(e.message))
                time.sleep(1.0)

            if try_counter > try_max:
                log('[ENV] Unable to create robot handle after {0} tries'.
                    format(try_max))
                exit(1)

        if self.robot_handle is not None:
            log('[ENV] Successfully connected to VREP')

        # Start the monitoring thread
        self.monitor_thread = RobotMonitorThread(
            portnum=19998,
            objname='torso_11_respondable',
            height_threshold=0.3)
        self.monitor_thread.start()
        log('[ENV] Started monitoring thread')

        # Wait 1s for the monitoring thread
        time.sleep(1.0)

        # Note the current position
        self.start_pos_x = self.monitor_thread.x
        self.start_pos_y = self.monitor_thread.y
        self.start_pos_z = self.monitor_thread.z

        # Strange error handler
        if self.start_pos_y is None:
            self.start_pos_x = 0.0
        if self.start_pos_y is None:
            self.start_pos_y = 0.0
        if self.start_pos_z is None:
            self.start_pos_z = 0.0

        # Set up the oscillator constants
        self.tau = 0.2800
        self.tau_prime = 0.4977
        self.beta = 2.5000
        self.w_0 = 2.2829
        self.u_e = 0.4111
        self.m1 = 1.0
        self.m2 = 1.0
        self.a = 1.0

        # Modify the time constants based on kf
        self.tau *= self.kf
        self.tau_prime *= self.kf

        # Step times
        self.higher_control_dt = 0.1
        self.lower_control_dt = 0.01

        # Set the oscillator variables
        # Oscillator 1 (pacemaker)
        self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1, self.gain_1, self.bias_1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0
        # Oscillator 2
        self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2, self.gain_2, self.bias_2 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1
        # Oscillator 3
        self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3, self.gain_3, self.bias_3 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1
        # Oscillator 4
        self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4, self.gain_4, self.bias_4 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2
        # Oscillator 5
        self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5, self.gain_5, self.bias_5 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2
        # Oscillator 6
        self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6, self.gain_6, self.bias_6 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3
        # Oscillator 7
        self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7, self.gain_7, self.bias_7 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3
        # Oscillator 8
        self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8, self.gain_8, self.bias_8 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0
        # Oscillator 9
        self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9, self.gain_9, self.bias_9 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0
        # Oscillator 10
        self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10, self.gain_10, self.bias_10 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0
        # Oscillator 11
        self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11, self.gain_11, self.bias_11 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0
        # Oscillator 12
        self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12, self.gain_12, self.bias_12 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4
        # Oscillator 13
        self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13, self.gain_13, self.bias_13 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4

        # Set the joints to the initial bias positions - use slow angle setter
        initial_bias_angles = {
            'l_hip_y': self.bias_2,
            'r_hip_y': self.bias_3,
            'l_knee_y': self.bias_4,
            'r_knee_y': self.bias_5,
            'l_ankle_y': self.bias_6,
            'r_ankle_y': self.bias_7,
            'l_shoulder_y': self.bias_12,
            'r_shoulder_y': self.bias_13
        }
        self.robot_handle.set_angles_slow(target_angles=initial_bias_angles,
                                          duration=5.0,
                                          step=0.01)

        # Sleep for 2 seconds to let any oscillations to die down
        time.sleep(2.0)

        log('[ENV] Resetting monitoring thread timer')
        # Reset the timer of the monitor
        self.monitor_thread.reset_timer()

        # New variable for logging up time, since monitor thread is not accurate some times
        self.up_t = 0.0
示例#19
0
def oscillator_nw(position_vector, max_time=20.0, fitness_option=6):

    home_dir = os.path.expanduser('~')

    log('[OSC] Running oscillator_4.oscillator_nw')

    # Extract the elements from the position vector
    kf = position_vector[0]
    GAIN1 = position_vector[1]
    GAIN2 = position_vector[2]
    GAIN3 = position_vector[3]
    GAIN4 = position_vector[4]
    GAIN5 = position_vector[5]
    GAIN6 = position_vector[6]
    BIAS1 = position_vector[7]
    BIAS2 = position_vector[8]
    BIAS3 = position_vector[9]
    BIAS4 = position_vector[10]
    k_hip_y = position_vector[11]
    k_knee_y = position_vector[12]
    k_ankle_y = position_vector[13]
    k_hip_x = position_vector[14]
    k_ankle_x = position_vector[15]
    k_shoulder_y = position_vector[16]

    log('[OSC] Printing chromosome')
    log('[OSC] kf:{0} GAIN1:{1} GAIN2:{2} GAIN3:{3} GAIN4:{4} GAIN5:{5} GAIN6:{6} BIAS1:{7} BIAS2:{8} BIAS3:{9} BIAS4:{10} k_hip_y:{11} k_knee_y:{12} k_ankle_y:{13} k_hip_x:{14} k_ankle_x:{15} k_shoulder_y:{16}'
        .format(kf, GAIN1, GAIN2, GAIN3, GAIN4, GAIN5, GAIN6, BIAS1, BIAS2,
                BIAS3, BIAS4, k_hip_y, k_knee_y, k_ankle_y, k_hip_x, k_ankle_x,
                k_shoulder_y))
    log('[OSC] {0}'.format(position_vector))

    # Try to connect to VREP
    try_counter = 0
    try_max = 5
    robot_handle = None
    while robot_handle is None:
        try:
            log('[OSC] Trying to create robot handle (attempt: {0} of {1})'.
                format(try_counter, try_max))
            try_counter += 1
            robot_handle = Nico(
                sync_sleep_time=0.1,
                motor_config=os.path.join(
                    home_dir,
                    'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json'
                ),
                vrep=True,
                vrep_host='127.0.0.1',
                vrep_port=19997,
                vrep_scene=os.path.join(
                    home_dir,
                    'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt'
                ))
        except Exception, e:
            log('[OSC] Could not connect to VREP')
            log('[OSC] Error: {0}'.format(e.message))
            time.sleep(1.0)

        if try_counter > try_max:
            log('[OSC] Unable to create robot handle after {0} tries'.format(
                try_max))
            exit(1)
class Oscillator3Thread(threading.Thread):
    def __init__(self):

        # Call init of superclass
        threading.Thread.__init__(self)

        # Set the home directory
        self.home_dir = os.path.expanduser('~')

        # Set the name of the VREP object
        self.vrep_body_object = 'torso_11_respondable'

        # This is the lowest possible gain that can be set, the highest possible gain in 1.0 (no change)
        self.lowest_possible_gain = LOWEST_POSSIBLE_GAIN

        # Initially the 2 gain factors are set to 1.0 (no change)
        # These gain factors will be set by the RL algorithm (using the action method)
        self.gain_factor_l_hip_y = 1.0
        self.gain_factor_r_hip_y = 1.0

        # Variable to indicate if terminal state has been reached
        self.terminal = False

        # Variable to stop the thread
        self.stop_thread = False

        # Max time to walk (in seconds)
        self.max_walk_time = MAX_WALK_TIME

        # Variables from the best chromosome
        position_vector = BEST_CHROMOSOME

        self.kf = position_vector[0]
        self.GAIN1 = position_vector[1]
        self.GAIN2 = position_vector[2]
        self.GAIN3 = position_vector[3]
        self.GAIN4 = position_vector[4]
        self.GAIN5 = position_vector[5]
        self.GAIN6 = position_vector[6]
        self.BIAS1 = position_vector[7]
        self.BIAS2 = position_vector[8]
        self.BIAS3 = position_vector[9]
        self.BIAS4 = position_vector[10]
        self.k = position_vector[11]

        # Create the robot handle
        # Try to connect to VREP
        try_counter = 0
        try_max = 5
        self.robot_handle = None
        while self.robot_handle is None:

            # Close existing connections if any
            pypot.vrep.close_all_connections()

            try:
                log('[OSC] Trying to create robot handle (attempt: {0} of {1})'
                    .format(try_counter, try_max))
                try_counter += 1
                self.robot_handle = Nico(
                    sync_sleep_time=0.1,
                    motor_config=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json'
                    ),
                    vrep=True,
                    vrep_host='127.0.0.1',
                    vrep_port=19997,
                    vrep_scene=os.path.join(
                        self.home_dir,
                        'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs_with_path.ttt'
                    ))

            except Exception, e:
                log('[OSC] Could not connect to VREP')
                log('[OSC] Error: {0}'.format(e.message))
                time.sleep(1.0)

            if try_counter > try_max:
                log('[OSC] Unable to create robot handle after {0} tries'.
                    format(try_max))
                exit(1)

        if self.robot_handle is not None:
            log('[OSC] Successfully connected to VREP')

        # Start the monitoring thread
        self.monitor_thread = RobotMonitorThread(
            portnum=19998,
            objname='torso_11_respondable',
            height_threshold=0.3)
        self.monitor_thread.start()
        log('[OSC] Started monitoring thread')

        log('[OSC] Lowest possible gain: {}'.format(LOWEST_POSSIBLE_GAIN))

        # Wait 1s for the monitoring thread
        time.sleep(1.0)

        # Note the current position
        self.start_pos_x = self.monitor_thread.x
        self.start_pos_y = self.monitor_thread.y
        self.start_pos_z = self.monitor_thread.z

        # Strange error handler
        if self.start_pos_y is None:
            self.start_pos_x = 0.0
        if self.start_pos_y is None:
            self.start_pos_y = 0.0
        if self.start_pos_z is None:
            self.start_pos_z = 0.0

        # Set up the oscillator constants
        self.tau = 0.2800
        self.tau_prime = 0.4977
        self.beta = 2.5000
        self.w_0 = 2.2829
        self.u_e = 0.4111
        self.m1 = 1.0
        self.m2 = 1.0
        self.a = 1.0

        # Modify the time constants based on kf
        self.tau *= self.kf
        self.tau_prime *= self.kf

        # Step times
        self.lower_control_dt = 0.01

        # Set the oscillator variables
        # Oscillator 1 (pacemaker)
        self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1, self.gain_1, self.bias_1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0
        # Oscillator 2
        self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2, self.gain_2, self.bias_2 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1
        # Oscillator 3
        self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3, self.gain_3, self.bias_3 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1
        # Oscillator 4
        self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4, self.gain_4, self.bias_4 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2
        # Oscillator 5
        self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5, self.gain_5, self.bias_5 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2
        # Oscillator 6
        self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6, self.gain_6, self.bias_6 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3
        # Oscillator 7
        self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7, self.gain_7, self.bias_7 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3
        # Oscillator 8
        self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8, self.gain_8, self.bias_8 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0
        # Oscillator 9
        self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9, self.gain_9, self.bias_9 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0
        # Oscillator 10
        self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10, self.gain_10, self.bias_10 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0
        # Oscillator 11
        self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11, self.gain_11, self.bias_11 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0
        # Oscillator 12
        self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12, self.gain_12, self.bias_12 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4
        # Oscillator 13
        self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13, self.gain_13, self.bias_13 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4

        # Set the joints to the initial bias positions - use slow angle setter
        initial_bias_angles = {
            'l_hip_y': self.bias_2,
            'r_hip_y': self.bias_3,
            'l_knee_y': self.bias_4,
            'r_knee_y': self.bias_5,
            'l_ankle_y': self.bias_6,
            'r_ankle_y': self.bias_7,
            'l_shoulder_y': self.bias_12,
            'r_shoulder_y': self.bias_13
        }
        self.robot_handle.set_angles_slow(target_angles=initial_bias_angles,
                                          duration=5.0,
                                          step=0.01)

        # Sleep for 2 seconds to let any oscillations to die down
        time.sleep(2.0)

        # Reset the timer of the monitor
        self.monitor_thread.reset_timer()

        # New variable for logging up time, since monitor thread is not accurate some times
        self.up_t = 0.0