def gym_init(self): log('[ENV] gym_init called') # Gym initialization # Actions - [gain_factor_l_hip_y, gain_factor_r_hip_y] # States - [torso_alpha, torso_beta, torso_gamma, d_torso_alpha, d_torso_beta, d_torso_gamma, torso_x, torso_y, d_torso_x, d_torso_y] # Set the max and min gain factor # The gain factor is multiplied with the joint gain self.gain_factor_max = 1.0 self.gain_factor_min = 1.0 # Initialize the gain factors self.gain_factor_l_hip_y = 1.0 self.gain_factor_r_hip_y = 1.0 # Initialize the action and observation spaces obs = np.array([np.inf] * 10) self.action_space = spaces.Box(low=np.array([0.75, 0.75]), high=np.array([1.0, 1.0])) self.observation_space = spaces.Box(-obs, obs) # Variable for the number of steps in the episode self.step_counter = 0
def _step(self, actions): # Set the actions self.oscillator_thread.self_action(actions) # Time for the actions to take effect time.sleep(1.0) # Make observation self._self_observe() # Calculate the reward # Since the robot starts at x=0,y=0 and faces the x-direction, the reward is calculated as -1.0*abs(y position) objpos = self.oscillator_thread.monitor_thread.objpos fallen = self.oscillator_thread.monitor_thread.fallen torso_orientation = self.oscillator_thread.monitor_thread.torso_euler_angles forward_x = objpos[0] deviation = objpos[1] torso_gamma = torso_orientation[2] reward = ALPHA * (-abs(deviation)) + BETA * forward_x + THETA * ( -abs(torso_gamma)) log('[ENV] Deviation: {0} X-distance: {1} Torso-gamma: {2}'.format( deviation, forward_x, torso_gamma)) # Increment the step counter self.step_counter += 1 # Large negative reward if the robot falls if fallen: reward -= 100.0 return self.observation, reward, self.oscillator_thread.terminal, {}
def _close(self): log('[ENV] Stopping the environment') self.oscillator_thread.monitor_thread.stop() self.oscillator_thread.monitor_thread.join() # Close the VREP connection self.oscillator_thread.robot_handle.cleanup() self.oscillator_thread.stop() self.oscillator_thread.join()
def destroy(self): log('[ENV] destroy called') # Stop the monitoring thread self.monitor_thread.stop() # Close the VREP connection self.robot_handle.cleanup()
def _reset(self): log('[ENV] _reset called') self.monitor_thread.stop() self.matsuoka_init() self.gym_init() self._self_observe() # Set the observation return self.observation
def force_sensor_callback(data): global prev_z_force, curr_z_force # Internal state variables of the pacemaker oscillator used as global variables global u1_1, u2_1, v1_1, v2_1, y1_1, y2_1, o_1, gain_1, bias_1 # Retrieve the force plot_data as a string str_force_data = data.data # Convert into a numpy 2D array (8 rows, 3 columns) # Each row for 1 sensor (l1, l2, l3, l4, r1, r2, r3, r4) # Each row has x, y and z force values arr_force_data = np.fromstring(str_force_data.replace('{', '').replace('}', ''), sep=',').reshape((8, 3)) # The left foot sensor values (force) l1 = arr_force_data[0] # toe l2 = arr_force_data[1] # toe l3 = arr_force_data[2] # heel l4 = arr_force_data[3] # heel # The right foot sensor values (force) r1 = arr_force_data[4] # toe r2 = arr_force_data[5] # toe r3 = arr_force_data[6] # heel r4 = arr_force_data[7] # heel # Average z forces for left foot l_toe_avg_z = (l1[2] + l2[2]) / 2.0 l_heel_avg_z = (l3[2] + l4[2]) / 2.0 l_avg_z = (l1[2] + l2[2] + l3[2] + l4[2]) / 4.0 # Average z forces for right foot r_toe_avg_z = (r1[2] + r2[2]) / 2.0 r_heel_avg_z = (r3[2] + r4[2]) / 2.0 r_avg_z = (r1[2] + r2[2] + r3[2] + r4[2]) / 4.0 # Reset the phase only when the force goes from below the threshold to above threshold curr_z_force = r_heel_avg_z if prev_z_force < force_threshold <= curr_z_force: # When r_heel_avg_z goes above force_threshold reset the phase # Sleep for 0.2s for the change to take effect u1_1, u2_1, v1_1, v2_1, y1_1, y2_1, o_1, gain_1, bias_1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0 log('[ROS] **** Resetting phase ****') log('[ROS] prev_z_force: {0} curr_z_force: {1}'.format( prev_z_force, curr_z_force)) time.sleep(0.1) prev_z_force = curr_z_force
def _reset(self): log('[ENV] Resetting the environment') self.oscillator_thread.monitor_thread.stop() self.oscillator_thread.monitor_thread.join() # Close the VREP connection self.oscillator_thread.robot_handle.cleanup() self.oscillator_thread.stop() self.oscillator_thread.join() self.__init__() self._self_observe() # Set the observation return self.observation
def stop(self): """ Sets the flag which will stop the thread :return: None """ # Flag to stop the monitoring thread self.stop_flag = True # Close the monitoring vrep connection self.vrepio_obj.close() # Calculate the average foot step size self.calc_avg_footstep() # Calculate the variance of the torso orientation self.calc_var_orientation() # Store the results GaitEvalResult.fallen = self.fallen GaitEvalResult.up_time = self.up_time GaitEvalResult.x = self.x GaitEvalResult.y = abs(self.y) GaitEvalResult.avg_footstep = self.avg_footstep GaitEvalResult.var_torso_orientation_alpha = self.var_torso_orientation_alpha GaitEvalResult.var_torso_orientation_beta = self.var_torso_orientation_beta GaitEvalResult.var_torso_orientation_gamma = self.var_torso_orientation_gamma print 'Fall:{0}, ' \ 'Up_time:{1}, ' \ 'X-distance:{2}, ' \ 'Deviation (mag):{3}, ' \ 'Avg. step length:{4}, ' \ 'Torso orientation variance Alpha-Beta-Gamma:{5}|{6}|{7}'.format(self.fallen, self.up_time, self.x, abs(self.y), self.avg_footstep, self.var_torso_orientation_alpha, self.var_torso_orientation_beta, self.var_torso_orientation_gamma) log('[MON] Monitor finished')
def __init__(self): log('[ENV] Initilializing environment') # Gym initialization # Actions - [gain_factor_l_hip_y, gain_factor_r_hip_y] # States - [torso_alpha, torso_beta, torso_gamma, d_torso_alpha, d_torso_beta, d_torso_gamma, torso_x, torso_y, d_torso_x, d_torso_y] # Set the max and min gain factor # The gain factor is multiplied with the joint gain self.gain_factor_max = 1.0 self.gain_factor_min = 1.0 # Initialize the gain factors self.gain_factor_l_hip_y = 1.0 self.gain_factor_r_hip_y = 1.0 # Initialize the action and observation spaces obs = np.array([np.inf] * 10) self.action_space = spaces.Box(low=np.array([0.75, 0.75]), high=np.array([1.0, 1.0])) self.observation_space = spaces.Box(-obs, obs) # Initilize the observation variables self.observation = None # Variable for the number of steps in the episode self.step_counter = 0 # Set the oscillator thread # This will initialize the VREP scene and set the robot to the starting position self.oscillator_thread = Oscillator3Thread() # Start the oscillator thread # This will start the matsuoka walk self.oscillator_thread.start()
def matsuoka_init(self): # Set the home directory self.home_dir = os.path.expanduser('~') log('[ENV] matsuoka_init called') # Max time to walk (in seconds) self.max_walk_time = 20 # Variables from the best chromosome position_vector = [ 0.7461913734531209, 0.8422944031253159, 0.07043758116681641, 0.14236621222553963, 0.48893497409925746, 0.5980055418720059, 0.740811806645801, -0.11618361090424223, 0.492832184960149, -0.2949145038394889, 0.175450703085948, -0.3419733470484183 ] self.kf = position_vector[0] self.GAIN1 = position_vector[1] self.GAIN2 = position_vector[2] self.GAIN3 = position_vector[3] self.GAIN4 = position_vector[4] self.GAIN5 = position_vector[5] self.GAIN6 = position_vector[6] self.BIAS1 = position_vector[7] self.BIAS2 = position_vector[8] self.BIAS3 = position_vector[9] self.BIAS4 = position_vector[10] self.k = position_vector[11] # Create the robot handle # Try to connect to VREP try_counter = 0 try_max = 5 self.robot_handle = None while self.robot_handle is None: # Close existing connections if any pypot.vrep.close_all_connections() try: log('[ENV] Trying to create robot handle (attempt: {0} of {1})' .format(try_counter, try_max)) try_counter += 1 self.robot_handle = Nico( sync_sleep_time=0.1, motor_config=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json' ), vrep=True, vrep_host='127.0.0.1', vrep_port=19997, vrep_scene=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt' )) except Exception, e: log('[ENV] Could not connect to VREP') log('[ENV] Error: {0}'.format(e.message)) time.sleep(1.0) if try_counter > try_max: log('[ENV] Unable to create robot handle after {0} tries'. format(try_max)) exit(1)
def _step(self, actions): # Run 10 iterations of matsuoka loop followed by 1 iteration of checking torso # Make observation, take action, calculate reward # Scale the actions so that they are within range actions = self.scale_actions(actions) log('Scaled actions: {}'.format(actions)) # Set the gain factors self.gain_factor_l_hip_y = actions[0] self.gain_factor_r_hip_y = actions[1] #for t in np.arange(0.0, max_time, dt): for i in range(10): # Increment the up time variable self.up_t += self.lower_control_dt # Calculate the current angles of the l and r saggital hip joints self.feedback_angles = self.robot_handle.get_angles( ['l_hip_y', 'r_hip_y']) # Calculate next state of oscillator 1 (pacemaker) self.f1_1, self.f2_1 = 0.0, 0.0 self.s1_1, self.s2_1 = 0.0, 0.0 self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1 = self.oscillator_next( u1=self.u1_1, u2=self.u2_1, v1=self.v1_1, v2=self.v2_1, y1=self.y1_1, y2=self.y2_1, f1=self.f1_1, f2=self.f2_1, s1=self.s1_1, s2=self.s2_1, bias=self.bias_1, gain=self.gain_1, dt=self.lower_control_dt) # Calculate next state of oscillator 2 # w_ij -> j=1 (oscillator 1) is master, i=2 (oscillator 2) is slave # Gain factor set by the higher level control is set here self.w_21 = 1.0 self.f1_2, self.f2_2 = self.k * self.feedback_angles[ 'l_hip_y'], -self.k * self.feedback_angles['l_hip_y'] self.s1_2, self.s2_2 = self.w_21 * self.u1_1, self.w_21 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2 = self.oscillator_next( u1=self.u1_2, u2=self.u2_2, v1=self.v1_2, v2=self.v2_2, y1=self.y1_2, y2=self.y2_2, f1=self.f1_2, f2=self.f2_2, s1=self.s1_2, s2=self.s2_2, bias=self.bias_2, gain=self.gain_factor_l_hip_y * self.gain_2, dt=self.lower_control_dt) # Calculate next state of oscillator 3 # w_ij -> j=1 (oscillator 1) is master, i=3 (oscillator 3) is slave # Gain factor set by the higher level control is set here self.w_31 = -1.0 self.f1_3, self.f2_3 = self.k * self.feedback_angles[ 'r_hip_y'], -self.k * self.feedback_angles['r_hip_y'] self.s1_3, self.s2_3 = self.w_31 * self.u1_1, self.w_31 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3 = self.oscillator_next( u1=self.u1_3, u2=self.u2_3, v1=self.v1_3, v2=self.v2_3, y1=self.y1_3, y2=self.y2_3, f1=self.f1_3, f2=self.f2_3, s1=self.s1_3, s2=self.s2_3, bias=self.bias_3, gain=self.gain_factor_r_hip_y * self.gain_3, dt=self.lower_control_dt) # Calculate next state of oscillator 4 # w_ij -> j=2 (oscillator 2) is master, i=4 (oscillator 4) is slave self.w_42 = -1.0 self.f1_4, self.f2_4 = 0.0, 0.0 self.s1_4, self.s2_4 = self.w_42 * self.u1_2, self.w_42 * self.u2_2 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4 = self.oscillator_next( u1=self.u1_4, u2=self.u2_4, v1=self.v1_4, v2=self.v2_4, y1=self.y1_4, y2=self.y2_4, f1=self.f1_4, f2=self.f2_4, s1=self.s1_4, s2=self.s2_4, bias=self.bias_4, gain=self.gain_4, dt=self.lower_control_dt) # Calculate next state of oscillator 5 # w_ij -> j=3 (oscillator 3) is master, i=5 (oscillator 5) is slave self.w_53 = -1.0 self.f1_5, self.f2_5 = 0.0, 0.0 self.s1_5, self.s2_5 = self.w_53 * self.u1_3, self.w_53 * self.u2_3 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5 = self.oscillator_next( u1=self.u1_5, u2=self.u2_5, v1=self.v1_5, v2=self.v2_5, y1=self.y1_5, y2=self.y2_5, f1=self.f1_5, f2=self.f2_5, s1=self.s1_5, s2=self.s2_5, bias=self.bias_5, gain=self.gain_5, dt=self.lower_control_dt) # Calculate next state of oscillator 6 # w_ij -> j=2 (oscillator 2) is master, i=6 (oscillator 6) is slave self.w_62 = -1.0 self.f1_6, self.f2_6 = 0.0, 0.0 self.s1_6, self.s2_6 = self.w_62 * self.u1_2, self.w_62 * self.u2_2 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6 = self.oscillator_next( u1=self.u1_6, u2=self.u2_6, v1=self.v1_6, v2=self.v2_6, y1=self.y1_6, y2=self.y2_6, f1=self.f1_6, f2=self.f2_6, s1=self.s1_6, s2=self.s2_6, bias=self.bias_6, gain=self.gain_6, dt=self.lower_control_dt) # Calculate next state of oscillator 7 # w_ij -> j=3 (oscillator 3) is master, i=7 (oscillator 7) is slave self.w_73 = -1.0 self.f1_7, self.f2_7 = 0.0, 0.0 self.s1_7, self.s2_7 = self.w_73 * self.u1_3, self.w_73 * self.u2_3 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7 = self.oscillator_next( u1=self.u1_7, u2=self.u2_7, v1=self.v1_7, v2=self.v2_7, y1=self.y1_7, y2=self.y2_7, f1=self.f1_7, f2=self.f2_7, s1=self.s1_7, s2=self.s2_7, bias=self.bias_7, gain=self.gain_7, dt=self.lower_control_dt) # Calculate next state of oscillator 8 # w_ij -> j=1 (oscillator 1) is master, i=8 (oscillator 8) is slave self.w_81 = 1.0 self.f1_8, self.f2_8 = 0.0, 0.0 self.s1_8, self.s2_8 = self.w_81 * self.u1_1, self.w_81 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8 = self.oscillator_next( u1=self.u1_8, u2=self.u2_8, v1=self.v1_8, v2=self.v2_8, y1=self.y1_8, y2=self.y2_8, f1=self.f1_8, f2=self.f2_8, s1=self.s1_8, s2=self.s2_8, bias=self.bias_8, gain=self.gain_8, dt=self.lower_control_dt) # Calculate next state of oscillator 9 # w_ij -> j=8 (oscillator 8) is master, i=9 (oscillator 9) is slave self.w_98 = -1.0 self.f1_9, self.f2_9 = 0.0, 0.0 self.s1_9, self.s2_9 = self.w_98 * self.u1_8, self.w_98 * self.u2_8 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9 = self.oscillator_next( u1=self.u1_9, u2=self.u2_9, v1=self.v1_9, v2=self.v2_9, y1=self.y1_9, y2=self.y2_9, f1=self.f1_9, f2=self.f2_9, s1=self.s1_9, s2=self.s2_9, bias=self.bias_9, gain=self.gain_9, dt=self.lower_control_dt) # Calculate next state of oscillator 10 # w_ij -> j=1 (oscillator 1) is master, i=10 (oscillator 10) is slave self.w_101 = 1.0 self.f1_10, self.f2_10 = 0.0, 0.0 self.s1_10, self.s2_10 = self.w_101 * self.u1_1, self.w_101 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10 = self.oscillator_next( u1=self.u1_10, u2=self.u2_10, v1=self.v1_10, v2=self.v2_10, y1=self.y1_10, y2=self.y2_10, f1=self.f1_10, f2=self.f2_10, s1=self.s1_10, s2=self.s2_10, bias=self.bias_10, gain=self.gain_10, dt=self.lower_control_dt) # Calculate next state of oscillator 11 # w_ij -> j=10 (oscillator 10) is master, i=11 (oscillator 11) is slave self.w_1110 = -1.0 self.f1_11, self.f2_11 = 0.0, 0.0 self.s1_11, self.s2_11 = self.w_1110 * self.u1_10, self.w_1110 * self.u2_10 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11 = self.oscillator_next( u1=self.u1_11, u2=self.u2_11, v1=self.v1_11, v2=self.v2_11, y1=self.y1_11, y2=self.y2_11, f1=self.f1_11, f2=self.f2_11, s1=self.s1_11, s2=self.s2_11, bias=self.bias_11, gain=self.gain_11, dt=self.lower_control_dt) # Calculate next state of oscillator 12 # w_ij -> j=1 (oscillator 1) is master, i=12 (oscillator 12) is slave self.w_121 = -1.0 self.f1_12, self.f2_12 = 0.0, 0.0 self.s1_12, self.s2_12 = self.w_121 * self.u1_1, self.w_121 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12 = self.oscillator_next( u1=self.u1_12, u2=self.u2_12, v1=self.v1_12, v2=self.v2_12, y1=self.y1_12, y2=self.y2_12, f1=self.f1_12, f2=self.f2_12, s1=self.s1_12, s2=self.s2_12, bias=self.bias_12, gain=self.gain_12, dt=self.lower_control_dt) # Calculate next state of oscillator 13 # w_ij -> j=1 (oscillator 1) is master, i=13 (oscillator 13) is slave self.w_131 = 1.0 self.f1_13, self.f2_13 = 0.0, 0.0 self.s1_13, self.s2_13 = self.w_131 * self.u1_1, self.w_131 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13 = self.oscillator_next( u1=self.u1_13, u2=self.u2_13, v1=self.v1_13, v2=self.v2_13, y1=self.y1_13, y2=self.y2_13, f1=self.f1_13, f2=self.f2_13, s1=self.s1_13, s2=self.s2_13, bias=self.bias_13, gain=self.gain_13, dt=self.lower_control_dt) # Set the joint positions self.current_angles = { 'l_hip_y': self.o_2, 'r_hip_y': self.o_3, 'l_knee_y': self.o_4, 'r_knee_y': self.o_5, 'l_ankle_y': self.o_6, 'r_ankle_y': self.o_7, 'l_hip_x': self.o_8, 'l_ankle_x': self.o_9, 'r_hip_x': self.o_10, 'r_ankle_x': self.o_11, 'l_shoulder_y': self.o_12, 'r_shoulder_y': self.o_13 } self.robot_handle.set_angles(self.current_angles) time.sleep(self.lower_control_dt) # Check if the robot has fallen if self.monitor_thread.fallen: break # Make observation self._self_observe() # Calculate the reward # Since the robot starts at x=0,y=0 and faces the x-direction, the reward is calculated as -1.0*abs(y position) reward = -1.0 * abs(self.monitor_thread.y) # Increment the step counter self.step_counter += 1 # Check if the current state is terminal terminal = False if self.monitor_thread.fallen or not (self.up_t < self.max_walk_time): terminal = True # Large negative reward if the robot falls if self.monitor_thread.fallen: reward -= 100.0 # No need to put a sleep here, it is taken care of inside the matsuoka loop return self.observation, reward, terminal, {}
def __init__(self): # Call init of superclass threading.Thread.__init__(self) # Set the home directory self.home_dir = os.path.expanduser('~') # Set the name of the VREP object self.vrep_body_object = 'torso_11_respondable' # This is the lowest possible gain that can be set, the highest possible gain in 1.0 (no change) self.lowest_possible_gain = LOWEST_POSSIBLE_GAIN # Initially the 2 gain factors are set to 1.0 (no change) # These gain factors will be set by the RL algorithm (using the action method) self.gain_factor_l_hip_y = 1.0 self.gain_factor_r_hip_y = 1.0 # Variable to indicate if terminal state has been reached self.terminal = False # Variable to stop the thread self.stop_thread = False # Max time to walk (in seconds) self.max_walk_time = MAX_WALK_TIME # Variables from the best chromosome position_vector = BEST_CHROMOSOME self.kf = position_vector[0] self.GAIN1 = position_vector[1] self.GAIN2 = position_vector[2] self.GAIN3 = position_vector[3] self.GAIN4 = position_vector[4] self.GAIN5 = position_vector[5] self.GAIN6 = position_vector[6] self.BIAS1 = position_vector[7] self.BIAS2 = position_vector[8] self.BIAS3 = position_vector[9] self.BIAS4 = position_vector[10] self.k = position_vector[11] # Create the robot handle # Try to connect to VREP try_counter = 0 try_max = 5 self.robot_handle = None while self.robot_handle is None: # Close existing connections if any pypot.vrep.close_all_connections() try: log('[OSC] Trying to create robot handle (attempt: {0} of {1})' .format(try_counter, try_max)) try_counter += 1 self.robot_handle = Nico( sync_sleep_time=0.1, motor_config=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json' ), vrep=True, vrep_host='127.0.0.1', vrep_port=19997, vrep_scene=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs_with_path.ttt' )) except Exception, e: log('[OSC] Could not connect to VREP') log('[OSC] Error: {0}'.format(e.message)) time.sleep(1.0) if try_counter > try_max: log('[OSC] Unable to create robot handle after {0} tries'. format(try_max)) exit(1)
vrep_scene=os.path.join( home_dir, 'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt' )) except Exception, e: log('[OSC] Could not connect to VREP') log('[OSC] Error: {0}'.format(e.message)) time.sleep(1.0) if try_counter > try_max: log('[OSC] Unable to create robot handle after {0} tries'.format( try_max)) exit(1) if robot_handle is not None: log('[OSC] Successfully connected to VREP') # Start the monitoring thread monitor_thread = RobotMonitorThread(portnum=19998, objname='torso_11_respondable', height_threshold=0.3) monitor_thread.start() log('[OSC] Started monitoring thread') # Wait 1s for the monitoring thread time.sleep(1.0) # Note the current position start_pos_x = monitor_thread.x start_pos_y = monitor_thread.y start_pos_z = monitor_thread.z
vrep_scene=os.path.join( home_dir, 'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt' )) except Exception, e: log('[OSC] Could not connect to VREP') log('[OSC] Error: {0}'.format(e.message)) time.sleep(1.0) if try_counter > try_max: log('[OSC] Unable to create robot handle after {0} tries'.format( try_max)) exit(1) if robot_handle is not None: log('[OSC] Successfully connected to VREP') # Start the monitoring thread monitor_thread = RobotMonitorThread(portnum=19998, objname='torso_11_respondable', height_threshold=0.3) monitor_thread.start() log('[OSC] Started monitoring thread') # Note the current position start_pos_x = monitor_thread.x start_pos_y = monitor_thread.y start_pos_z = monitor_thread.z # Wait 1s for the monitoring thread time.sleep(1.0)
def run(self): for t in np.arange(0.0, self.max_walk_time, self.lower_control_dt): # Increment the up time variable self.up_t += self.lower_control_dt # Calculate the current angles of the l and r saggital hip joints self.feedback_angles = self.robot_handle.get_angles( ['l_hip_y', 'r_hip_y']) # For verification - print every 100 iterations #if t in np.arange(0.0, self.max_walk_time, self.lower_control_dt*100): #log('[OSC] Feedback angles: {}'.format(self.feedback_angles)) #log('[OSC] Gain values: {}'.format((self.gain_factor_l_hip_y, self.gain_factor_r_hip_y))) # Calculate next state of oscillator 1 (pacemaker) self.f1_1, self.f2_1 = 0.0, 0.0 self.s1_1, self.s2_1 = 0.0, 0.0 self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1 = self.oscillator_next( u1=self.u1_1, u2=self.u2_1, v1=self.v1_1, v2=self.v2_1, y1=self.y1_1, y2=self.y2_1, f1=self.f1_1, f2=self.f2_1, s1=self.s1_1, s2=self.s2_1, bias=self.bias_1, gain=self.gain_1, dt=self.lower_control_dt) # Calculate next state of oscillator 2 # w_ij -> j=1 (oscillator 1) is master, i=2 (oscillator 2) is slave # Gain factor set by the higher level control is set here self.w_21 = 1.0 self.f1_2, self.f2_2 = self.k * self.feedback_angles[ 'l_hip_y'], -self.k * self.feedback_angles['l_hip_y'] self.s1_2, self.s2_2 = self.w_21 * self.u1_1, self.w_21 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2 = self.oscillator_next( u1=self.u1_2, u2=self.u2_2, v1=self.v1_2, v2=self.v2_2, y1=self.y1_2, y2=self.y2_2, f1=self.f1_2, f2=self.f2_2, s1=self.s1_2, s2=self.s2_2, bias=self.bias_2, gain=self.gain_factor_l_hip_y * self.gain_2, dt=self.lower_control_dt) # Calculate next state of oscillator 3 # w_ij -> j=1 (oscillator 1) is master, i=3 (oscillator 3) is slave # Gain factor set by the higher level control is set here self.w_31 = -1.0 self.f1_3, self.f2_3 = self.k * self.feedback_angles[ 'r_hip_y'], -self.k * self.feedback_angles['r_hip_y'] self.s1_3, self.s2_3 = self.w_31 * self.u1_1, self.w_31 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3 = self.oscillator_next( u1=self.u1_3, u2=self.u2_3, v1=self.v1_3, v2=self.v2_3, y1=self.y1_3, y2=self.y2_3, f1=self.f1_3, f2=self.f2_3, s1=self.s1_3, s2=self.s2_3, bias=self.bias_3, gain=self.gain_factor_r_hip_y * self.gain_3, dt=self.lower_control_dt) # Calculate next state of oscillator 4 # w_ij -> j=2 (oscillator 2) is master, i=4 (oscillator 4) is slave self.w_42 = -1.0 self.f1_4, self.f2_4 = 0.0, 0.0 self.s1_4, self.s2_4 = self.w_42 * self.u1_2, self.w_42 * self.u2_2 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4 = self.oscillator_next( u1=self.u1_4, u2=self.u2_4, v1=self.v1_4, v2=self.v2_4, y1=self.y1_4, y2=self.y2_4, f1=self.f1_4, f2=self.f2_4, s1=self.s1_4, s2=self.s2_4, bias=self.bias_4, gain=self.gain_4, dt=self.lower_control_dt) # Calculate next state of oscillator 5 # w_ij -> j=3 (oscillator 3) is master, i=5 (oscillator 5) is slave self.w_53 = -1.0 self.f1_5, self.f2_5 = 0.0, 0.0 self.s1_5, self.s2_5 = self.w_53 * self.u1_3, self.w_53 * self.u2_3 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5 = self.oscillator_next( u1=self.u1_5, u2=self.u2_5, v1=self.v1_5, v2=self.v2_5, y1=self.y1_5, y2=self.y2_5, f1=self.f1_5, f2=self.f2_5, s1=self.s1_5, s2=self.s2_5, bias=self.bias_5, gain=self.gain_5, dt=self.lower_control_dt) # Calculate next state of oscillator 6 # w_ij -> j=2 (oscillator 2) is master, i=6 (oscillator 6) is slave self.w_62 = -1.0 self.f1_6, self.f2_6 = 0.0, 0.0 self.s1_6, self.s2_6 = self.w_62 * self.u1_2, self.w_62 * self.u2_2 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6 = self.oscillator_next( u1=self.u1_6, u2=self.u2_6, v1=self.v1_6, v2=self.v2_6, y1=self.y1_6, y2=self.y2_6, f1=self.f1_6, f2=self.f2_6, s1=self.s1_6, s2=self.s2_6, bias=self.bias_6, gain=self.gain_6, dt=self.lower_control_dt) # Calculate next state of oscillator 7 # w_ij -> j=3 (oscillator 3) is master, i=7 (oscillator 7) is slave self.w_73 = -1.0 self.f1_7, self.f2_7 = 0.0, 0.0 self.s1_7, self.s2_7 = self.w_73 * self.u1_3, self.w_73 * self.u2_3 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7 = self.oscillator_next( u1=self.u1_7, u2=self.u2_7, v1=self.v1_7, v2=self.v2_7, y1=self.y1_7, y2=self.y2_7, f1=self.f1_7, f2=self.f2_7, s1=self.s1_7, s2=self.s2_7, bias=self.bias_7, gain=self.gain_7, dt=self.lower_control_dt) # Calculate next state of oscillator 8 # w_ij -> j=1 (oscillator 1) is master, i=8 (oscillator 8) is slave self.w_81 = 1.0 self.f1_8, self.f2_8 = 0.0, 0.0 self.s1_8, self.s2_8 = self.w_81 * self.u1_1, self.w_81 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8 = self.oscillator_next( u1=self.u1_8, u2=self.u2_8, v1=self.v1_8, v2=self.v2_8, y1=self.y1_8, y2=self.y2_8, f1=self.f1_8, f2=self.f2_8, s1=self.s1_8, s2=self.s2_8, bias=self.bias_8, gain=self.gain_8, dt=self.lower_control_dt) # Calculate next state of oscillator 9 # w_ij -> j=8 (oscillator 8) is master, i=9 (oscillator 9) is slave self.w_98 = -1.0 self.f1_9, self.f2_9 = 0.0, 0.0 self.s1_9, self.s2_9 = self.w_98 * self.u1_8, self.w_98 * self.u2_8 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9 = self.oscillator_next( u1=self.u1_9, u2=self.u2_9, v1=self.v1_9, v2=self.v2_9, y1=self.y1_9, y2=self.y2_9, f1=self.f1_9, f2=self.f2_9, s1=self.s1_9, s2=self.s2_9, bias=self.bias_9, gain=self.gain_9, dt=self.lower_control_dt) # Calculate next state of oscillator 10 # w_ij -> j=1 (oscillator 1) is master, i=10 (oscillator 10) is slave self.w_101 = 1.0 self.f1_10, self.f2_10 = 0.0, 0.0 self.s1_10, self.s2_10 = self.w_101 * self.u1_1, self.w_101 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10 = self.oscillator_next( u1=self.u1_10, u2=self.u2_10, v1=self.v1_10, v2=self.v2_10, y1=self.y1_10, y2=self.y2_10, f1=self.f1_10, f2=self.f2_10, s1=self.s1_10, s2=self.s2_10, bias=self.bias_10, gain=self.gain_10, dt=self.lower_control_dt) # Calculate next state of oscillator 11 # w_ij -> j=10 (oscillator 10) is master, i=11 (oscillator 11) is slave self.w_1110 = -1.0 self.f1_11, self.f2_11 = 0.0, 0.0 self.s1_11, self.s2_11 = self.w_1110 * self.u1_10, self.w_1110 * self.u2_10 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11 = self.oscillator_next( u1=self.u1_11, u2=self.u2_11, v1=self.v1_11, v2=self.v2_11, y1=self.y1_11, y2=self.y2_11, f1=self.f1_11, f2=self.f2_11, s1=self.s1_11, s2=self.s2_11, bias=self.bias_11, gain=self.gain_11, dt=self.lower_control_dt) # Calculate next state of oscillator 12 # w_ij -> j=1 (oscillator 1) is master, i=12 (oscillator 12) is slave self.w_121 = -1.0 self.f1_12, self.f2_12 = 0.0, 0.0 self.s1_12, self.s2_12 = self.w_121 * self.u1_1, self.w_121 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12 = self.oscillator_next( u1=self.u1_12, u2=self.u2_12, v1=self.v1_12, v2=self.v2_12, y1=self.y1_12, y2=self.y2_12, f1=self.f1_12, f2=self.f2_12, s1=self.s1_12, s2=self.s2_12, bias=self.bias_12, gain=self.gain_12, dt=self.lower_control_dt) # Calculate next state of oscillator 13 # w_ij -> j=1 (oscillator 1) is master, i=13 (oscillator 13) is slave self.w_131 = 1.0 self.f1_13, self.f2_13 = 0.0, 0.0 self.s1_13, self.s2_13 = self.w_131 * self.u1_1, self.w_131 * self.u2_1 # s1_i = w_ij*u1_j, s2_i = w_ij*u2_j self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13 = self.oscillator_next( u1=self.u1_13, u2=self.u2_13, v1=self.v1_13, v2=self.v2_13, y1=self.y1_13, y2=self.y2_13, f1=self.f1_13, f2=self.f2_13, s1=self.s1_13, s2=self.s2_13, bias=self.bias_13, gain=self.gain_13, dt=self.lower_control_dt) # Set the joint positions self.current_angles = { 'l_hip_y': self.o_2, 'r_hip_y': self.o_3, 'l_knee_y': self.o_4, 'r_knee_y': self.o_5, 'l_ankle_y': self.o_6, 'r_ankle_y': self.o_7, 'l_hip_x': self.o_8, 'l_ankle_x': self.o_9, 'r_hip_x': self.o_10, 'r_ankle_x': self.o_11, 'l_shoulder_y': self.o_12, 'r_shoulder_y': self.o_13 } self.robot_handle.set_angles(self.current_angles) time.sleep(self.lower_control_dt) # Check if the robot has fallen if self.monitor_thread.fallen: log('[OSC] Robot has fallen') break # Outside the loop it means that either time is over or robot has fallen self.terminal = True log('[OSC] Oscillator thread up time: {}'.format(self.up_t)) # Run until stopped while not self.stop_thread: time.sleep(1.0)
def calc_fitness(start_x, start_y, start_z, end_x, end_y, end_z, avg_z, up_time, fitness_option): log('[FIT] Executing calc_fitness with fitness_option: {0}'.format( fitness_option)) log('[FIT] Other arguments:') log('[FIT] start_x={0}, start_y={1}, start_z={2}'.format( start_x, start_y, start_z)) log('[FIT] end_x={0}, end_y={1}, end_z={2}'.format(end_x, end_y, end_z)) log('[FIT] avg_z={0}'.format(avg_z)) log('[FIT] up_time={0}'.format(up_time)) x_distance = end_x - start_x y_distance = end_y - start_y x_vel = x_distance / up_time # (metres/second) fitness = 0.0 if fitness_option == 1: # Fitness is the distance moved in the x direction (metres) fitness = x_distance log('[FIT] fitness = x_distance') elif fitness_option == 2: # Fitness is the distance moved in the x direction (metres) + up_time (minutes) fitness = x_distance + up_time / 60.0 log('[FIT] fitness = x_distance + up_time/60.0') elif fitness_option == 3: # Fitness is the distance moved in the x direction * 0.3 (metres) + up_time (seconds) * 0.7 # This formula yielded the stable walk in open loop fitness = x_distance * 0.3 + up_time * 0.7 log('[FIT] fitness = x_distance*0.3 + up_time*0.7') elif fitness_option == 4: # Fitness is the straight line velocity minus a penalty for deviating from the straight line # Follows the formula in Cristiano's paper (coefficient values are aplha=80, gamma=100) fitness = 80.0 * x_vel - 100 * abs(y_distance) log('[FIT] fitness = 80.0*x_vel - 100*abs(y_distance)') elif fitness_option == 5: # Fitness is the sum of x-distance(m), time(minutes), x-vel(m/s) fitness = x_distance + (up_time / 60.0) + x_vel log('[FIT] fitness = x_distance + (up_time/60.0) + x_vel') elif fitness_option == 6: # Fitness is the distance moved in the x direction (metres) + up_time (seconds)*0.5 # This formula yielded the stable walk in open loop fitness = x_distance + up_time * 0.5 log('[FIT] fitness = x_distance + up_time*0.5') return fitness
def __init__(self, portnum, objname, height_threshold, force_threshold=10): """ Initializes the RobotMonitorThread thread :param portnum: Port number on which the VREP remote API is listening on the server (different than the one used to run the main robot simulation) :param objname: Object name which is to be monitored :height_threshold: If the object's height is lower than this value, the robot is considered to have falen """ # Call init of super class Thread.__init__(self) # Setup the log home_dir = os.path.expanduser('~') log('[MON] Monitor started') # Create a VrepIO object using a different port number than the one used to run the main robot simulation # Make sure that the VREP remote api on the server is listening on this port number # Additional ports for listening can be set up on the server side by editing the file remoteApiConnections.txt self.vrepio_obj = VrepIO(vrep_port=portnum) # The object to be monitored self.objname = objname # Threshold for falling self.height_threshold = height_threshold # Threshold for force sensor self.force_threshold = force_threshold # The position of the object self.objpos = None # Initialize the foot sensor forces self.l1 = 0.0 self.l2 = 0.0 self.l3 = 0.0 self.l4 = 0.0 self.l_heel_current = 0.0 self.l_heel_previous = 0.0 self.r1 = 0.0 self.r2 = 0.0 self.r3 = 0.0 self.r4 = 0.0 self.r_heel_current = 0.0 self.r_heel_previous = 0.0 # Lists to hold the foot positions as ('foot type',x,y) tuples self.foot_position = list() # Variable to store average foot step self.avg_footstep = 0.0 # Flag for phase reset self.phase_reset = False # The x,y,z coordinates of the position self.x = None self.y = None self.z = None # The starting time self.start_time = time.time() self.up_time = 0.0 # The average height of the robot self.avg_z = 0.0 # A list to store the height at each second self.z_list = list() # A flag which can stop the thread self.stop_flag = False # Lists to store torso orientations self.torso_orientation_alpha = list() self.torso_orientation_beta = list() self.torso_orientation_gamma = list() # Variables to store the torso orientation variance self.var_torso_orientation_alpha = 0.0 self.var_torso_orientation_beta = 0.0 self.var_torso_orientation_gamma = 0.0 # A flag to indicate if the robot has fallen self.fallen = False
class MatsuokaAngleFeedbackEnv(gym.Env): def gym_init(self): log('[ENV] gym_init called') # Gym initialization # Actions - [gain_factor_l_hip_y, gain_factor_r_hip_y] # States - [torso_alpha, torso_beta, torso_gamma, d_torso_alpha, d_torso_beta, d_torso_gamma, torso_x, torso_y, d_torso_x, d_torso_y] # Set the max and min gain factor # The gain factor is multiplied with the joint gain self.gain_factor_max = 1.0 self.gain_factor_min = 1.0 # Initialize the gain factors self.gain_factor_l_hip_y = 1.0 self.gain_factor_r_hip_y = 1.0 # Initialize the action and observation spaces obs = np.array([np.inf] * 10) self.action_space = spaces.Box(low=np.array([0.75, 0.75]), high=np.array([1.0, 1.0])) self.observation_space = spaces.Box(-obs, obs) # Variable for the number of steps in the episode self.step_counter = 0 def scale_actions(self, actions): """ Scales the actions to be withing the gain factor limits The actions are squashed using a sigmoid activation function but due to addition of noise turing training, their values may lie outside the [0,1] range 0 and below is set to gain_factor_min 1 and above is set to gain_factor_max In-between values are scaled to a value between max and min limits :param actions: :return: scaled actions """ action_1 = actions[0] action_2 = actions[1] scaled_action_1 = 0.0 scaled_action_2 = 0.0 if action_1 <= 0.0: scaled_action_1 = self.gain_factor_min elif action_1 >= 1.0: scaled_action_1 = self.gain_factor_max elif 0.0 < action_1 < 1.0: scaled_action_1 = self.gain_factor_min + ( (action_1 - 0.0) / (1.0 - 0.0)) * (self.gain_factor_max - self.gain_factor_min) if action_2 <= 0.0: scaled_action_2 = self.gain_factor_min elif action_2 >= 1.0: scaled_action_2 = self.gain_factor_max elif 0.0 < action_2 < 1.0: scaled_action_2 = self.gain_factor_min + ( (action_2 - 0.0) / (1.0 - 0.0)) * (self.gain_factor_max - self.gain_factor_min) return [scaled_action_1, scaled_action_2] def matsuoka_init(self): # Set the home directory self.home_dir = os.path.expanduser('~') log('[ENV] matsuoka_init called') # Max time to walk (in seconds) self.max_walk_time = 20 # Variables from the best chromosome position_vector = [ 0.7461913734531209, 0.8422944031253159, 0.07043758116681641, 0.14236621222553963, 0.48893497409925746, 0.5980055418720059, 0.740811806645801, -0.11618361090424223, 0.492832184960149, -0.2949145038394889, 0.175450703085948, -0.3419733470484183 ] self.kf = position_vector[0] self.GAIN1 = position_vector[1] self.GAIN2 = position_vector[2] self.GAIN3 = position_vector[3] self.GAIN4 = position_vector[4] self.GAIN5 = position_vector[5] self.GAIN6 = position_vector[6] self.BIAS1 = position_vector[7] self.BIAS2 = position_vector[8] self.BIAS3 = position_vector[9] self.BIAS4 = position_vector[10] self.k = position_vector[11] # Create the robot handle # Try to connect to VREP try_counter = 0 try_max = 5 self.robot_handle = None while self.robot_handle is None: # Close existing connections if any pypot.vrep.close_all_connections() try: log('[ENV] Trying to create robot handle (attempt: {0} of {1})' .format(try_counter, try_max)) try_counter += 1 self.robot_handle = Nico( sync_sleep_time=0.1, motor_config=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json' ), vrep=True, vrep_host='127.0.0.1', vrep_port=19997, vrep_scene=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt' )) except Exception, e: log('[ENV] Could not connect to VREP') log('[ENV] Error: {0}'.format(e.message)) time.sleep(1.0) if try_counter > try_max: log('[ENV] Unable to create robot handle after {0} tries'. format(try_max)) exit(1) if self.robot_handle is not None: log('[ENV] Successfully connected to VREP') # Start the monitoring thread self.monitor_thread = RobotMonitorThread( portnum=19998, objname='torso_11_respondable', height_threshold=0.3) self.monitor_thread.start() log('[ENV] Started monitoring thread') # Wait 1s for the monitoring thread time.sleep(1.0) # Note the current position self.start_pos_x = self.monitor_thread.x self.start_pos_y = self.monitor_thread.y self.start_pos_z = self.monitor_thread.z # Strange error handler if self.start_pos_y is None: self.start_pos_x = 0.0 if self.start_pos_y is None: self.start_pos_y = 0.0 if self.start_pos_z is None: self.start_pos_z = 0.0 # Set up the oscillator constants self.tau = 0.2800 self.tau_prime = 0.4977 self.beta = 2.5000 self.w_0 = 2.2829 self.u_e = 0.4111 self.m1 = 1.0 self.m2 = 1.0 self.a = 1.0 # Modify the time constants based on kf self.tau *= self.kf self.tau_prime *= self.kf # Step times self.higher_control_dt = 0.1 self.lower_control_dt = 0.01 # Set the oscillator variables # Oscillator 1 (pacemaker) self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1, self.gain_1, self.bias_1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0 # Oscillator 2 self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2, self.gain_2, self.bias_2 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1 # Oscillator 3 self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3, self.gain_3, self.bias_3 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1 # Oscillator 4 self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4, self.gain_4, self.bias_4 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2 # Oscillator 5 self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5, self.gain_5, self.bias_5 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2 # Oscillator 6 self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6, self.gain_6, self.bias_6 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3 # Oscillator 7 self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7, self.gain_7, self.bias_7 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3 # Oscillator 8 self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8, self.gain_8, self.bias_8 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0 # Oscillator 9 self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9, self.gain_9, self.bias_9 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0 # Oscillator 10 self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10, self.gain_10, self.bias_10 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0 # Oscillator 11 self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11, self.gain_11, self.bias_11 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0 # Oscillator 12 self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12, self.gain_12, self.bias_12 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4 # Oscillator 13 self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13, self.gain_13, self.bias_13 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4 # Set the joints to the initial bias positions - use slow angle setter initial_bias_angles = { 'l_hip_y': self.bias_2, 'r_hip_y': self.bias_3, 'l_knee_y': self.bias_4, 'r_knee_y': self.bias_5, 'l_ankle_y': self.bias_6, 'r_ankle_y': self.bias_7, 'l_shoulder_y': self.bias_12, 'r_shoulder_y': self.bias_13 } self.robot_handle.set_angles_slow(target_angles=initial_bias_angles, duration=5.0, step=0.01) # Sleep for 2 seconds to let any oscillations to die down time.sleep(2.0) log('[ENV] Resetting monitoring thread timer') # Reset the timer of the monitor self.monitor_thread.reset_timer() # New variable for logging up time, since monitor thread is not accurate some times self.up_t = 0.0
def oscillator_nw(position_vector, max_time=20.0, fitness_option=6): home_dir = os.path.expanduser('~') log('[OSC] Running oscillator_4.oscillator_nw') # Extract the elements from the position vector kf = position_vector[0] GAIN1 = position_vector[1] GAIN2 = position_vector[2] GAIN3 = position_vector[3] GAIN4 = position_vector[4] GAIN5 = position_vector[5] GAIN6 = position_vector[6] BIAS1 = position_vector[7] BIAS2 = position_vector[8] BIAS3 = position_vector[9] BIAS4 = position_vector[10] k_hip_y = position_vector[11] k_knee_y = position_vector[12] k_ankle_y = position_vector[13] k_hip_x = position_vector[14] k_ankle_x = position_vector[15] k_shoulder_y = position_vector[16] log('[OSC] Printing chromosome') log('[OSC] kf:{0} GAIN1:{1} GAIN2:{2} GAIN3:{3} GAIN4:{4} GAIN5:{5} GAIN6:{6} BIAS1:{7} BIAS2:{8} BIAS3:{9} BIAS4:{10} k_hip_y:{11} k_knee_y:{12} k_ankle_y:{13} k_hip_x:{14} k_ankle_x:{15} k_shoulder_y:{16}' .format(kf, GAIN1, GAIN2, GAIN3, GAIN4, GAIN5, GAIN6, BIAS1, BIAS2, BIAS3, BIAS4, k_hip_y, k_knee_y, k_ankle_y, k_hip_x, k_ankle_x, k_shoulder_y)) log('[OSC] {0}'.format(position_vector)) # Try to connect to VREP try_counter = 0 try_max = 5 robot_handle = None while robot_handle is None: try: log('[OSC] Trying to create robot handle (attempt: {0} of {1})'. format(try_counter, try_max)) try_counter += 1 robot_handle = Nico( sync_sleep_time=0.1, motor_config=os.path.join( home_dir, 'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json' ), vrep=True, vrep_host='127.0.0.1', vrep_port=19997, vrep_scene=os.path.join( home_dir, 'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs.ttt' )) except Exception, e: log('[OSC] Could not connect to VREP') log('[OSC] Error: {0}'.format(e.message)) time.sleep(1.0) if try_counter > try_max: log('[OSC] Unable to create robot handle after {0} tries'.format( try_max)) exit(1)
class Oscillator3Thread(threading.Thread): def __init__(self): # Call init of superclass threading.Thread.__init__(self) # Set the home directory self.home_dir = os.path.expanduser('~') # Set the name of the VREP object self.vrep_body_object = 'torso_11_respondable' # This is the lowest possible gain that can be set, the highest possible gain in 1.0 (no change) self.lowest_possible_gain = LOWEST_POSSIBLE_GAIN # Initially the 2 gain factors are set to 1.0 (no change) # These gain factors will be set by the RL algorithm (using the action method) self.gain_factor_l_hip_y = 1.0 self.gain_factor_r_hip_y = 1.0 # Variable to indicate if terminal state has been reached self.terminal = False # Variable to stop the thread self.stop_thread = False # Max time to walk (in seconds) self.max_walk_time = MAX_WALK_TIME # Variables from the best chromosome position_vector = BEST_CHROMOSOME self.kf = position_vector[0] self.GAIN1 = position_vector[1] self.GAIN2 = position_vector[2] self.GAIN3 = position_vector[3] self.GAIN4 = position_vector[4] self.GAIN5 = position_vector[5] self.GAIN6 = position_vector[6] self.BIAS1 = position_vector[7] self.BIAS2 = position_vector[8] self.BIAS3 = position_vector[9] self.BIAS4 = position_vector[10] self.k = position_vector[11] # Create the robot handle # Try to connect to VREP try_counter = 0 try_max = 5 self.robot_handle = None while self.robot_handle is None: # Close existing connections if any pypot.vrep.close_all_connections() try: log('[OSC] Trying to create robot handle (attempt: {0} of {1})' .format(try_counter, try_max)) try_counter += 1 self.robot_handle = Nico( sync_sleep_time=0.1, motor_config=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/motor_configs/nico_humanoid_full_v1.json' ), vrep=True, vrep_host='127.0.0.1', vrep_port=19997, vrep_scene=os.path.join( self.home_dir, 'computing/repositories/hierarchical_bipedal_controller/vrep_scenes/NICO-Simplified-July2017_standing_Foot_sensors_v4_no_graphs_with_path.ttt' )) except Exception, e: log('[OSC] Could not connect to VREP') log('[OSC] Error: {0}'.format(e.message)) time.sleep(1.0) if try_counter > try_max: log('[OSC] Unable to create robot handle after {0} tries'. format(try_max)) exit(1) if self.robot_handle is not None: log('[OSC] Successfully connected to VREP') # Start the monitoring thread self.monitor_thread = RobotMonitorThread( portnum=19998, objname='torso_11_respondable', height_threshold=0.3) self.monitor_thread.start() log('[OSC] Started monitoring thread') log('[OSC] Lowest possible gain: {}'.format(LOWEST_POSSIBLE_GAIN)) # Wait 1s for the monitoring thread time.sleep(1.0) # Note the current position self.start_pos_x = self.monitor_thread.x self.start_pos_y = self.monitor_thread.y self.start_pos_z = self.monitor_thread.z # Strange error handler if self.start_pos_y is None: self.start_pos_x = 0.0 if self.start_pos_y is None: self.start_pos_y = 0.0 if self.start_pos_z is None: self.start_pos_z = 0.0 # Set up the oscillator constants self.tau = 0.2800 self.tau_prime = 0.4977 self.beta = 2.5000 self.w_0 = 2.2829 self.u_e = 0.4111 self.m1 = 1.0 self.m2 = 1.0 self.a = 1.0 # Modify the time constants based on kf self.tau *= self.kf self.tau_prime *= self.kf # Step times self.lower_control_dt = 0.01 # Set the oscillator variables # Oscillator 1 (pacemaker) self.u1_1, self.u2_1, self.v1_1, self.v2_1, self.y1_1, self.y2_1, self.o_1, self.gain_1, self.bias_1 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0 # Oscillator 2 self.u1_2, self.u2_2, self.v1_2, self.v2_2, self.y1_2, self.y2_2, self.o_2, self.gain_2, self.bias_2 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1 # Oscillator 3 self.u1_3, self.u2_3, self.v1_3, self.v2_3, self.y1_3, self.y2_3, self.o_3, self.gain_3, self.bias_3 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN1, self.BIAS1 # Oscillator 4 self.u1_4, self.u2_4, self.v1_4, self.v2_4, self.y1_4, self.y2_4, self.o_4, self.gain_4, self.bias_4 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2 # Oscillator 5 self.u1_5, self.u2_5, self.v1_5, self.v2_5, self.y1_5, self.y2_5, self.o_5, self.gain_5, self.bias_5 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN3, self.BIAS2 # Oscillator 6 self.u1_6, self.u2_6, self.v1_6, self.v2_6, self.y1_6, self.y2_6, self.o_6, self.gain_6, self.bias_6 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3 # Oscillator 7 self.u1_7, self.u2_7, self.v1_7, self.v2_7, self.y1_7, self.y2_7, self.o_7, self.gain_7, self.bias_7 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN2, self.BIAS3 # Oscillator 8 self.u1_8, self.u2_8, self.v1_8, self.v2_8, self.y1_8, self.y2_8, self.o_8, self.gain_8, self.bias_8 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0 # Oscillator 9 self.u1_9, self.u2_9, self.v1_9, self.v2_9, self.y1_9, self.y2_9, self.o_9, self.gain_9, self.bias_9 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0 # Oscillator 10 self.u1_10, self.u2_10, self.v1_10, self.v2_10, self.y1_10, self.y2_10, self.o_10, self.gain_10, self.bias_10 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN4, 0.0 # Oscillator 11 self.u1_11, self.u2_11, self.v1_11, self.v2_11, self.y1_11, self.y2_11, self.o_11, self.gain_11, self.bias_11 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN5, 0.0 # Oscillator 12 self.u1_12, self.u2_12, self.v1_12, self.v2_12, self.y1_12, self.y2_12, self.o_12, self.gain_12, self.bias_12 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4 # Oscillator 13 self.u1_13, self.u2_13, self.v1_13, self.v2_13, self.y1_13, self.y2_13, self.o_13, self.gain_13, self.bias_13 = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, self.GAIN6, self.BIAS4 # Set the joints to the initial bias positions - use slow angle setter initial_bias_angles = { 'l_hip_y': self.bias_2, 'r_hip_y': self.bias_3, 'l_knee_y': self.bias_4, 'r_knee_y': self.bias_5, 'l_ankle_y': self.bias_6, 'r_ankle_y': self.bias_7, 'l_shoulder_y': self.bias_12, 'r_shoulder_y': self.bias_13 } self.robot_handle.set_angles_slow(target_angles=initial_bias_angles, duration=5.0, step=0.01) # Sleep for 2 seconds to let any oscillations to die down time.sleep(2.0) # Reset the timer of the monitor self.monitor_thread.reset_timer() # New variable for logging up time, since monitor thread is not accurate some times self.up_t = 0.0