def fitness_minitaur(chromosome, steps): """Retorna el fitness de un cromosoma """ # speed = chromosome[0] # time_step = chromosome[1] # amplitude1 = chromosome[2] # steering_amplitude = chromosome[3] # amplitufe2 = chromosome[4] randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=False, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) sum_reward = 0 for step_counter in range(steps): t = step_counter * chromosome[1] a1 = math.sin(t * chromosome[0]) * (chromosome[2] + chromosome[3]) a2 = math.sin(t * chromosome[0] + math.pi) * (chromosome[2] - chromosome[3]) a3 = math.sin(t * chromosome[0]) * chromosome[4] a4 = math.sin(t * chromosome[0] + math.pi) * chromosome[4] action = [a1, a2, a2, a1, a3, a4, a4, a3] _, reward, done, _ = environment.step(action) sum_reward += reward if done: sum_reward += 5 #castigo por caerse break environment.reset() fitness = sum_reward + chromosome[0] # objetivos return fitness
def pybullet_humanoid(): locals().update(default()) randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = 'HumanoidBulletEnv-v0' max_length = 1000 steps = 3e8 # 300M return locals()
def ResetPoseExample(): """ An example that the minitaur stands still using the reset pose. """ # How many steps steps = 1000 # Random number generator randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) # Minitaur gym environmennt environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, pd_control_enabled=True, accurate_motor_model_enabled=True, motor_overheat_protection=True, env_randomizer=randomizer, hard_reset=False) # Angle to apply at each step action = [math.pi / 2] * 8 # Run simulation steps for _ in range(steps): _, _, done, _ = environment.step(action) if done: break # end if # end for # Reset the environment environment.reset()
def SinePolicyExample(): """An example of minitaur walking with a sine gait.""" randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) sum_reward = 0 steps = 500 #20000 amplitude_1_bound = 0.1 amplitude_2_bound = 0.1 speed = 95.76487704867183 #99.95830805709147 #96.03904323742279 for step_counter in range(steps): time_step = 0.5180280589663037 #0.3738710084245188 #0.7118871187955457 t = step_counter * time_step amplitude1 = 0.7958106754045535 #0.6704708050626745 #0.8313325863588368 #amplitude_1_bound amplitude2 = 0.5783009482997857 #0.24525131201122005 #0.2804425976578887 #amplitude_2_bound steering_amplitude = 0 # Applying asymmetrical sine gaits to different legs can steer the minitaur. a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) a3 = math.sin(t * speed) * amplitude2 a4 = math.sin(t * speed + math.pi) * amplitude2 action = [a1, a2, a2, a1, a3, a4, a4, a3] _, reward, done, _ = environment.step(action) sum_reward += reward print(sum_reward) if done: break environment.reset()
def DDPG_Example(): randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = minitaur_gym_env.MinitaurBulletEnv(render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, shake_weight=0.0, drift_weight=0.0, energy_weight=0.0, on_rack=False) state_dim = 28 action_dim = 4 actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) with tf.Session() as sess: actor = ActorNetwork(sess, state_dim, action_dim, learning_rate=float(LR_A), tau=TAU, batch_size=BATCH_SIZE) critic = CriticNetwork(sess, state_dim, action_dim, learning_rate=float(LR_C), tau=TAU, gamma=float(GAMMA), num_actor_vars=actor.get_num_trainable_vars()) train(sess, env, actor, critic, actor_noise, load_weights=False)
def pybullet_minitaur(): """Configuration specific to minitaur_gym_env.MinitaurBulletEnv class.""" locals().update(default()) randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = functools.partial(minitaur_gym_env.MinitaurBulletEnv, accurate_motor_model_enabled=True, motor_overheat_protection=True, pd_control_enabled=True, env_randomizer=randomizer, render=False) max_length = 1000 steps = 3e7 # 30M return locals()
def ResetPoseExample(): """An example that the minitaur stands still using the reset pose.""" steps = 1000 randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, pd_control_enabled=True, accurate_motor_model_enabled=True, motor_overheat_protection=True, env_randomizer=randomizer, hard_reset=False) action = [math.pi / 2] * 8 for _ in range(steps): _, _, done, _ = environment.step(action) if done: break environment.reset()
def SinePolicyExample(): """An example of minitaur walking with a sine gait.""" randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) sum_reward = 0 steps = 20000 amplitude_1_bound = 0.1 amplitude_2_bound = 0.1 speed = 1 for step_counter in range(steps): time_step = 0.01 t = step_counter * time_step amplitude1 = amplitude_1_bound amplitude2 = amplitude_2_bound steering_amplitude = 0 if t < 10: steering_amplitude = 0.1 elif t < 20: steering_amplitude = -0.1 else: steering_amplitude = 0 # Applying asymmetrical sine gaits to different legs can steer the minitaur. a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) a3 = math.sin(t * speed) * amplitude2 a4 = math.sin(t * speed + math.pi) * amplitude2 action = [a1, a2, a2, a1, a3, a4, a4, a3] _, reward, done, _ = environment.step(action) sum_reward += reward if done: break environment.reset()
import sys sys.path.insert(0, '../..') import libs_agents import numpy import time import gym from pybullet_envs.bullet import minitaur_gym_env from pybullet_envs.bullet import minitaur_env_randomizer randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = minitaur_gym_env.MinitaurBulletEnv(render=True, leg_model_enabled=False, motor_velocity_limit=numpy.inf, pd_control_enabled=True, accurate_motor_model_enabled=True, motor_overheat_protection=True, env_randomizer=randomizer, hard_reset=False) class Wrapper(gym.Wrapper): def __init__(self, env): gym.Wrapper.__init__(self, env) def step(self, action): action_ = numpy.pi * (0.1 * action + 0.5) return self.env.step(action_)
def __init__(self, urdf_root=pybullet_data.getDataPath(), action_repeat=1, distance_weight=1.0, energy_weight=0.005, shake_weight=0.0, drift_weight=0.0, distance_limit=float("inf"), observation_noise_stdev=0.0, self_collision_enabled=True, motor_velocity_limit=np.inf, pd_control_enabled=False,#not needed to be true if accurate motor model is enabled (has its own better PD) leg_model_enabled=True, accurate_motor_model_enabled=True, motor_kp=1.0, motor_kd=0.02, torque_control_enabled=False, motor_overheat_protection=True, hard_reset=True, on_rack=False, render=False, kd_for_pd_controllers=0.3, env_randomizer=minitaur_env_randomizer.MinitaurEnvRandomizer()): """Initialize the minitaur gym environment. Args: urdf_root: The path to the urdf data folder. action_repeat: The number of simulation steps before actions are applied. distance_weight: The weight of the distance term in the reward. energy_weight: The weight of the energy term in the reward. shake_weight: The weight of the vertical shakiness term in the reward. drift_weight: The weight of the sideways drift term in the reward. distance_limit: The maximum distance to terminate the episode. observation_noise_stdev: The standard deviation of observation noise. self_collision_enabled: Whether to enable self collision in the sim. motor_velocity_limit: The velocity limit of each motor. pd_control_enabled: Whether to use PD controller for each motor. leg_model_enabled: Whether to use a leg motor to reparameterize the action space. accurate_motor_model_enabled: Whether to use the accurate DC motor model. motor_kp: proportional gain for the accurate motor model. motor_kd: derivative gain for the accurate motor model. torque_control_enabled: Whether to use the torque control, if set to False, pose control will be used. motor_overheat_protection: Whether to shutdown the motor that has exerted large torque (OVERHEAT_SHUTDOWN_TORQUE) for an extended amount of time (OVERHEAT_SHUTDOWN_TIME). See ApplyAction() in minitaur.py for more details. hard_reset: Whether to wipe the simulation and load everything when reset is called. If set to false, reset just place the minitaur back to start position and set its pose to initial configuration. on_rack: Whether to place the minitaur on rack. This is only used to debug the walking gait. In this mode, the minitaur's base is hanged midair so that its walking gait is clearer to visualize. render: Whether to render the simulation. kd_for_pd_controllers: kd value for the pd controllers of the motors env_randomizer: An EnvRandomizer to randomize the physical properties during reset(). """ self._time_step = 0.01 self._action_repeat = action_repeat self._num_bullet_solver_iterations = 300 self._urdf_root = urdf_root self._self_collision_enabled = self_collision_enabled self._motor_velocity_limit = motor_velocity_limit self._observation = [] self._env_step_counter = 0 self._is_render = render self._last_base_position = [0, 0, 0] self._distance_weight = distance_weight self._energy_weight = energy_weight self._drift_weight = drift_weight self._shake_weight = shake_weight self._distance_limit = distance_limit self._observation_noise_stdev = observation_noise_stdev self._action_bound = 1 self._pd_control_enabled = pd_control_enabled self._leg_model_enabled = leg_model_enabled self._accurate_motor_model_enabled = accurate_motor_model_enabled self._motor_kp = motor_kp self._motor_kd = motor_kd self._torque_control_enabled = torque_control_enabled self._motor_overheat_protection = motor_overheat_protection self._on_rack = on_rack self._cam_dist = 1.0 self._cam_yaw = 0 self._duckId = -1 self._cam_pitch = -30 self._hard_reset = True self._kd_for_pd_controllers = kd_for_pd_controllers self._last_frame_time = 0.0 print("urdf_root=" + self._urdf_root) self._env_randomizer = env_randomizer # PD control needs smaller time step for stability. if pd_control_enabled or accurate_motor_model_enabled: self._time_step /= NUM_SUBSTEPS self._num_bullet_solver_iterations /= NUM_SUBSTEPS self._action_repeat *= NUM_SUBSTEPS if self._is_render: self._pybullet_client = bullet_client.BulletClient( connection_mode=pybullet.GUI) else: self._pybullet_client = bullet_client.BulletClient() self._seed() self.reset() observation_high = ( self.minitaur.GetObservationUpperBound() + OBSERVATION_EPS) observation_low = ( self.minitaur.GetObservationLowerBound() - OBSERVATION_EPS) action_dim = 8 action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high) self.observation_space = spaces.Box(observation_low, observation_high) self.viewer = None self._hard_reset = hard_reset # This assignment need to be after reset()
def SinePolicyExample(): """ An example of minitaur walking with a sine gait """ # Random generator randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) # Load the minitaur environment environment = minitaur_gym_env.MinitaurBulletEnv( render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) # Reward summation sum_reward = 0 # How many steps steps = 20000 # Amplitudes amplitude_1_bound = 0.1 amplitude_2_bound = 0.1 # Speed parametere speed = 10 # Run each steps for step_counter in range(steps): # Time step time_step = 0.01 # Temporal position t = step_counter * time_step # Amplitudes amplitude1 = amplitude_1_bound amplitude2 = amplitude_2_bound steering_amplitude = 0 # Forward if t < 10: steering_amplitude = 0.1 # Backward elif t < 20: steering_amplitude = -0.1 # Nothing else: steering_amplitude = 0 # end if # Applying asymmetrical sine gaits to different legs can steer the minitaur. a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) a3 = math.sin(t * speed) * amplitude2 a4 = math.sin(t * speed + math.pi) * amplitude2 # List of actions action = [a1, a2, a2, a1, a3, a4, a4, a3] # Apply action and get reward _, reward, done, _ = environment.step(action) # Add to total reward sum_reward += reward # If done, end if done: break # end if # end for # Reset the environment environment.reset()