def fitness_minitaur(chromosome, steps): """Retorna el fitness de un cromosoma """ # speed = chromosome[0] # time_step = chromosome[1] # amplitude1 = chromosome[2] # steering_amplitude = chromosome[3] # amplitufe2 = chromosome[4] randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=False, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) sum_reward = 0 for step_counter in range(steps): t = step_counter * chromosome[1] a1 = math.sin(t * chromosome[0]) * (chromosome[2] + chromosome[3]) a2 = math.sin(t * chromosome[0] + math.pi) * (chromosome[2] - chromosome[3]) a3 = math.sin(t * chromosome[0]) * chromosome[4] a4 = math.sin(t * chromosome[0] + math.pi) * chromosome[4] action = [a1, a2, a2, a1, a3, a4, a4, a3] _, reward, done, _ = environment.step(action) sum_reward += reward if done: sum_reward += 5 #castigo por caerse break environment.reset() fitness = sum_reward + chromosome[0] # objetivos return fitness
def DDPG_Example(): randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = minitaur_gym_env.MinitaurBulletEnv(render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, shake_weight=0.0, drift_weight=0.0, energy_weight=0.0, on_rack=False) state_dim = 28 action_dim = 4 actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) with tf.Session() as sess: actor = ActorNetwork(sess, state_dim, action_dim, learning_rate=float(LR_A), tau=TAU, batch_size=BATCH_SIZE) critic = CriticNetwork(sess, state_dim, action_dim, learning_rate=float(LR_C), tau=TAU, gamma=float(GAMMA), num_actor_vars=actor.get_num_trainable_vars()) train(sess, env, actor, critic, actor_noise, load_weights=False)
def MotorOverheatExample(): """An example of minitaur motor overheat protection is triggered. The minitaur is leaning forward and the motors are getting obove threshold torques. The overheat protection will be triggered in ~1 sec. """ environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, motor_overheat_protection=True, accurate_motor_model_enabled=True, motor_kp=1.20, motor_kd=0.00, on_rack=False) action = [.0] * 8 for i in range(8): action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1) steps = 500 actions_and_observations = [] for step_counter in range(steps): # Matches the internal timestep. time_step = 0.01 t = step_counter * time_step current_row = [t] current_row.extend(action) observation, _, _, _ = environment.step(action) current_row.extend(observation.tolist()) actions_and_observations.append(current_row) environment.reset()
def make_env(env_name, seed=-1, render_mode=False): if (env_name.startswith("RacecarBulletEnv")): print("bullet_racecar_started") env = racecarGymEnv.RacecarGymEnv(isDiscrete=False, renders=render_mode) elif (env_name.startswith("MinitaurBulletEnv")): print("bullet_minitaur_started") env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurDuckBulletEnv")): print("bullet_minitaur_duck_started") env = MinitaurDuckBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurBallBulletEnv")): print("bullet_minitaur_ball_started") env = MinitaurBallBulletEnv(render=render_mode) elif (env_name.startswith("KukaBulletEnv")): print("bullet_kuka_grasping started") env = kukaGymEnv.KukaGymEnv(renders=render_mode,isDiscrete=False) else: if env_name.startswith("Roboschool"): import roboschool env = gym.make(env_name) if render_mode and not env_name.startswith("Roboschool"): env.render("human") if (seed >= 0): env.seed(seed) ''' print("environment details") print("env.action_space", env.action_space) print("high, low", env.action_space.high, env.action_space.low) print("environment details") print("env.observation_space", env.observation_space) print("high, low", env.observation_space.high, env.observation_space.low) assert False ''' return env
def __init__(self, env_name='HalfCheetah-v1', policy_params=None, num_workers=32, num_deltas=320, deltas_used=320, delta_std=0.02, logdir=None, rollout_length=1000, step_size=0.01, shift='constant zero', params=None, seed=123): logz.configure_output_dir(logdir) logz.save_params(params) env = minitaur_gym_env.MinitaurBulletEnv() #gym.make(env_name) self.timesteps = 0 self.action_size = env.action_space.shape[0] self.ob_size = env.observation_space.shape[0] self.num_deltas = num_deltas self.deltas_used = deltas_used self.rollout_length = rollout_length self.step_size = step_size self.delta_std = delta_std self.logdir = logdir self.shift = shift self.params = params self.max_past_avg_reward = float('-inf') self.num_episodes_used = float('inf') # create shared table for storing noise print("Creating deltas table.") deltas_id = create_shared_noise.remote() self.deltas = SharedNoiseTable(ray.get(deltas_id), seed = seed + 3) print('Created deltas table.') # initialize workers with different random seeds print('Initializing workers.') self.num_workers = num_workers self.workers = [Worker.remote(seed + 7 * i, env_name=env_name, policy_params=policy_params, deltas=deltas_id, rollout_length=rollout_length, delta_std=delta_std) for i in range(num_workers)] # initialize policy if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) self.w_policy = self.policy.get_weights() else: raise NotImplementedError # initialize optimization algorithm self.optimizer = optimizers.SGD(self.w_policy, self.step_size) print("Initialization of ARS complete.")
def ResetPoseExample(): """ An example that the minitaur stands still using the reset pose. """ # How many steps steps = 1000 # Random number generator randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) # Minitaur gym environmennt environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, pd_control_enabled=True, accurate_motor_model_enabled=True, motor_overheat_protection=True, env_randomizer=randomizer, hard_reset=False) # Angle to apply at each step action = [math.pi / 2] * 8 # Run simulation steps for _ in range(steps): _, _, done, _ = environment.step(action) if done: break # end if # end for # Reset the environment environment.reset()
def SinePolicyExample(): """An example of minitaur walking with a sine gait.""" randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) sum_reward = 0 steps = 500 #20000 amplitude_1_bound = 0.1 amplitude_2_bound = 0.1 speed = 95.76487704867183 #99.95830805709147 #96.03904323742279 for step_counter in range(steps): time_step = 0.5180280589663037 #0.3738710084245188 #0.7118871187955457 t = step_counter * time_step amplitude1 = 0.7958106754045535 #0.6704708050626745 #0.8313325863588368 #amplitude_1_bound amplitude2 = 0.5783009482997857 #0.24525131201122005 #0.2804425976578887 #amplitude_2_bound steering_amplitude = 0 # Applying asymmetrical sine gaits to different legs can steer the minitaur. a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) a3 = math.sin(t * speed) * amplitude2 a4 = math.sin(t * speed + math.pi) * amplitude2 action = [a1, a2, a2, a1, a3, a4, a4, a3] _, reward, done, _ = environment.step(action) sum_reward += reward print(sum_reward) if done: break environment.reset()
def MotorOverheatExample(): """ An example of minitaur motor overheat protection is triggered. The minitaur is leaning forward and the motors are getting above threshold torques. The overheat protection will be triggered in ~1 sec. """ environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, motor_overheat_protection=True, accurate_motor_model_enabled=True, motor_kp=1.20, motor_kd=0.00, on_rack=False) # Action to take at each step (empty) action = [.0] * 8 # Fill actions for i in range(8): action[i] = .0 - 0.1 * (-1 if i % 2 == 0 else 1) * (-1 if i < 4 else 1) # end for # How many steps to simulate steps = 500 # Stock action and observations actions_and_observations = [] # For each step for step_counter in range(steps): # Matches the internal timestep time_step = 0.01 # Temporal position t = step_counter * time_step # Time position and action current_row = [t] current_row.extend(action) # Result from action observation, _, _, _ = environment.step(action) # Add observation to row current_row.extend(observation.tolist()) # Save actions and observations actions_and_observations.append(current_row) # end for # Reset environment environment.reset()
def SineStandExample(): """ An example of minitaur standing and squatting on the floor. To validate the accurate motor model we command the robot and sit and stand up periodically in both simulation and experiment. We compare the measured motor trajectories, torques and gains. """ # Load the environment environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, motor_overheat_protection=True, accurate_motor_model_enabled=True, motor_kp=1.20, motor_kd=0.02, on_rack=False) # How many steps, amplitude and steps steps = 1000 amplitude = 0.5 speed = 30 # List to save actions and observations actions_and_observations = [] # Run each steps for step_counter in range(steps): # Matches the internal time step time_step = 0.01 # Time position t = step_counter * time_step # Add time current_row = [t] # Action as sine signal action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8 # Add action current_row.extend(action) # Make action an get the results observation, _, _, _ = environment.step(action) # Add the observation current_row.extend(observation.tolist()) # Append to the list actions_and_observations.append(current_row) # end for environment.reset()
def make_env(env_name, seed=-1, render_mode=False): if (env_name.startswith("RacecarBulletEnv")): print("bullet_racecar_started") env = racecarGymEnv.RacecarGymEnv(isDiscrete=False, renders=render_mode) elif (env_name.startswith("RocketLander")): from box2d.rocket import RocketLander env = RocketLander() elif (env_name.startswith("BipedalWalker")): if (env_name.startswith("BipedalWalkerHardcore")): from box2d.biped import BipedalWalkerHardcore env = BipedalWalkerHardcore() else: from box2d.biped import BipedalWalker env = BipedalWalker() elif (env_name.startswith("MinitaurBulletEnv")): print("bullet_minitaur_started") env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurDuckBulletEnv")): print("bullet_minitaur_duck_started") env = MinitaurDuckBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurBallBulletEnv")): print("bullet_minitaur_ball_started") env = MinitaurBallBulletEnv(render=render_mode) elif (env_name.startswith("CartPoleSwingUp")): print("cartpole_swingup_started") from custom_envs.cartpole_swingup import CartPoleSwingUpEnv hardcore = env_name.startswith("CartPoleSwingUpHardcore") env = CartPoleSwingUpEnv(hardcore=hardcore) elif (env_name.startswith("KukaBulletEnv")): print("bullet_kuka_grasping started") env = kukaGymEnv.KukaGymEnv(renders=render_mode, isDiscrete=False) else: if env_name.startswith("Roboschool"): import roboschool env = gym.make(env_name) if render_mode and not env_name.startswith("Roboschool"): env.render("human") if (seed >= 0): env.seed(seed) ''' print("environment details") print("env.action_space", env.action_space) print("high, low", env.action_space.high, env.action_space.low) print("environment details") print("env.observation_space", env.observation_space) print("high, low", env.observation_space.high, env.observation_space.low) assert False ''' return env
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('expert_policy_file', type=str) parser.add_argument('--render', action='store_true') parser.add_argument('--num_rollouts', type=int, default=1, help='Number of expert rollouts') args = parser.parse_args() print('loading and building expert policy') lin_policy = np.load(args.expert_policy_file) lin_policy = lin_policy.items()[0][1] M = lin_policy[0] # mean and std of state vectors estimated online by ARS. mean = lin_policy[1] std = lin_policy[2] env = minitaur_gym_env.MinitaurBulletEnv(render=True)#gym.make(env_name) # env = gym.make(args.envname) returns = [] observations = [] actions = [] for i in range(args.num_rollouts): print('iter', i) obs = env.reset() log_id = pybullet.startStateLogging(pybullet.STATE_LOGGING_VIDEO_MP4, "/usr/local/google/home/jietan/Projects/ARS/data/minitaur{}.mp4".format(i)) done = False totalr = 0. steps = 0 while not done: action = np.clip(np.dot(M, (obs - mean)/std), -1.0, 1.0) observations.append(obs) actions.append(action) obs, r, done, _ = env.step(action) totalr += r steps += 1 if args.render: env.render() if steps % 100 == 0: print("%i/%i"%(steps, 1000)) if steps >= 1000: break pybullet.stopStateLogging(log_id) returns.append(totalr) print('returns', returns) print('mean return', np.mean(returns)) print('std of return', np.std(returns))
def make_env(env_name, seed=-1, render_mode=False): if (env_name.startswith("RacecarBulletEnv")): print("bullet_racecar_started") env = racecarGymEnv.RacecarGymEnv(isDiscrete=False, renders=render_mode) elif (env_name.startswith("MinitaurBulletEnv")): print("bullet_minitaur_started") env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode) else: env = gym.make(env_name) if render_mode and ("BulletEnv-v0" in env_name): env.render("human") if (seed >= 0): env.seed(seed) return env
def ResetPoseExample(): """An example that the minitaur stands still using the reset pose.""" steps = 1000 randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, pd_control_enabled=True, accurate_motor_model_enabled=True, motor_overheat_protection=True, env_randomizer=randomizer, hard_reset=False) action = [math.pi / 2] * 8 for _ in range(steps): _, _, done, _ = environment.step(action) if done: break environment.reset()
def SinePolicyExample(): """An example of minitaur walking with a sine gait.""" randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) environment = minitaur_gym_env.MinitaurBulletEnv( render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) sum_reward = 0 steps = 20000 amplitude_1_bound = 0.1 amplitude_2_bound = 0.1 speed = 1 for step_counter in range(steps): time_step = 0.01 t = step_counter * time_step amplitude1 = amplitude_1_bound amplitude2 = amplitude_2_bound steering_amplitude = 0 if t < 10: steering_amplitude = 0.1 elif t < 20: steering_amplitude = -0.1 else: steering_amplitude = 0 # Applying asymmetrical sine gaits to different legs can steer the minitaur. a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) a3 = math.sin(t * speed) * amplitude2 a4 = math.sin(t * speed + math.pi) * amplitude2 action = [a1, a2, a2, a1, a3, a4, a4, a3] _, reward, done, _ = environment.step(action) sum_reward += reward if done: break environment.reset()
def __init__(self, env_seed, env_name='', policy_params = None, deltas=None, rollout_length=1000, delta_std=0.02): # initialize OpenAI environment for each worker self.env = minitaur_gym_env.MinitaurBulletEnv()#gym.make(env_name) self.env.seed(env_seed) print ("env seed: {}".format(env_seed)) # each worker gets access to the shared noise table # with independent random streams for sampling # from the shared noise table. self.deltas = SharedNoiseTable(deltas, env_seed + 7) self.policy_params = policy_params if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) else: raise NotImplementedError self.delta_std = delta_std self.rollout_length = rollout_length
def SineStandExample(): """An example of minitaur standing and squatting on the floor. To validate the accurate motor model we command the robot and sit and stand up periodically in both simulation and experiment. We compare the measured motor trajectories, torques and gains. """ environment = minitaur_gym_env.MinitaurBulletEnv( render=True, leg_model_enabled=False, motor_velocity_limit=np.inf, motor_overheat_protection=True, accurate_motor_model_enabled=True, motor_kp=1.20, motor_kd=0.02, on_rack=False) steps = 1000 amplitude = 0.5 speed = 3 actions_and_observations = [] for step_counter in range(steps): # Matches the internal timestep. time_step = 0.01 t = step_counter * time_step current_row = [t] action = [math.sin(speed * t) * amplitude + math.pi / 2] * 8 current_row.extend(action) print("Accion: ", action) observation, _, _, _ = environment.step(action) current_row.extend(observation.tolist()) actions_and_observations.append(current_row) environment.reset()
def run_ars(params): dir_path = params['dir_path'] if not(os.path.exists(dir_path)): os.makedirs(dir_path) logdir = dir_path if not(os.path.exists(logdir)): os.makedirs(logdir) env = minitaur_gym_env.MinitaurBulletEnv() #gym.make(params['env_name']) ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.shape[0] # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1. policy_params={'type':'linear', 'ob_filter':params['filter'], 'ob_dim':ob_dim, 'ac_dim':ac_dim} ARS = ARSLearner(env_name=params['env_name'], policy_params=policy_params, num_workers=params['n_workers'], num_deltas=params['n_directions'], deltas_used=params['deltas_used'], step_size=params['step_size'], delta_std=params['delta_std'], logdir=logdir, rollout_length=params['rollout_length'], shift=params['shift'], params=params, seed = params['seed']) ARS.train(params['n_iter']) return
def SinePolicyExample(): """ An example of minitaur walking with a sine gait """ # Random generator randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) # Load the minitaur environment environment = minitaur_gym_env.MinitaurBulletEnv( render=True, motor_velocity_limit=np.inf, pd_control_enabled=True, hard_reset=False, env_randomizer=randomizer, on_rack=False) # Reward summation sum_reward = 0 # How many steps steps = 20000 # Amplitudes amplitude_1_bound = 0.1 amplitude_2_bound = 0.1 # Speed parametere speed = 10 # Run each steps for step_counter in range(steps): # Time step time_step = 0.01 # Temporal position t = step_counter * time_step # Amplitudes amplitude1 = amplitude_1_bound amplitude2 = amplitude_2_bound steering_amplitude = 0 # Forward if t < 10: steering_amplitude = 0.1 # Backward elif t < 20: steering_amplitude = -0.1 # Nothing else: steering_amplitude = 0 # end if # Applying asymmetrical sine gaits to different legs can steer the minitaur. a1 = math.sin(t * speed) * (amplitude1 + steering_amplitude) a2 = math.sin(t * speed + math.pi) * (amplitude1 - steering_amplitude) a3 = math.sin(t * speed) * amplitude2 a4 = math.sin(t * speed + math.pi) * amplitude2 # List of actions action = [a1, a2, a2, a1, a3, a4, a4, a3] # Apply action and get reward _, reward, done, _ = environment.step(action) # Add to total reward sum_reward += reward # If done, end if done: break # end if # end for # Reset the environment environment.reset()
def make_env(env_name, seed=-1, render_mode=False): if (env_name.startswith("RacecarBulletEnv")): print("bullet_racecar_started") env = racecarGymEnv.RacecarGymEnv(isDiscrete=False, renders=render_mode) elif (env_name.startswith("AugmentBipedalWalker")): if (env_name.startswith("AugmentBipedalWalkerHardcore")): if (env_name.startswith("AugmentBipedalWalkerHardcoreSmallLegs")): from box2d.bipedal_walker import AugmentBipedalWalkerHardcoreSmallLegs env = AugmentBipedalWalkerHardcoreSmallLegs() else: from box2d.bipedal_walker import AugmentBipedalWalkerHardcore env = AugmentBipedalWalkerHardcore() elif (env_name.startswith("AugmentBipedalWalkerSmallLegs")): from box2d.bipedal_walker import AugmentBipedalWalkerSmallLegs env = AugmentBipedalWalkerSmallLegs() elif (env_name.startswith("AugmentBipedalWalkerTallLegs")): from box2d.bipedal_walker import AugmentBipedalWalkerTallLegs env = AugmentBipedalWalkerTallLegs() else: from box2d.bipedal_walker import AugmentBipedalWalker env = AugmentBipedalWalker() elif (env_name.startswith("MinitaurBulletEnv")): print("bullet_minitaur_started") env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurDuckBulletEnv")): print("bullet_minitaur_duck_started") env = MinitaurDuckBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurBallBulletEnv")): print("bullet_minitaur_ball_started") env = MinitaurBallBulletEnv(render=render_mode) elif (env_name.startswith("KukaBulletEnv")): print("bullet_kuka_grasping started") env = kukaGymEnv.KukaGymEnv(renders=render_mode, isDiscrete=False) else: if env_name.startswith("Augment"): import robogym if env_name.startswith("AugmentAnt"): from robogym import AugmentAnt env = AugmentAnt() elif env_name.startswith("AugmentHopper"): from robogym import AugmentHopper env = AugmentHopper() elif env_name.startswith("AugmentHalfCheetah"): from robogym import AugmentHalfCheetah env = AugmentHalfCheetah() else: env = gym.make(env_name) if render_mode and not env_name.startswith("Augment"): env.render("human") if (seed >= 0): env.seed(seed) ''' print("environment details") print("env.action_space", env.action_space) print("high, low", env.action_space.high, env.action_space.low) print("environment details") print("env.observation_space", env.observation_space) print("high, low", env.observation_space.high, env.observation_space.low) assert False ''' return env
def make_env(env_name, seed=-1, render_mode=False): if (env_name.startswith("RacecarBulletEnv")): import pybullet as p import pybullet_envs import pybullet_envs.bullet.racecarGymEnv as racecarGymEnv print("bullet_racecar_started") env = racecarGymEnv.RacecarGymEnv(isDiscrete=False, renders=render_mode) elif (env_name.startswith("RocketLander")): from box2d.rocket import RocketLander env = RocketLander() elif (env_name.startswith("BipedalWalker")): if (env_name.startswith("BipedalWalkerHardcore")): from box2d.biped import BipedalWalkerHardcore env = BipedalWalkerHardcore() else: from box2d.biped import BipedalWalker env = BipedalWalker() elif (env_name.startswith("MinitaurBulletEnv")): import pybullet as p import pybullet_envs import pybullet_envs.bullet.minitaur_gym_env as minitaur_gym_env print("bullet_minitaur_started") env = minitaur_gym_env.MinitaurBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurDuckBulletEnv")): print("bullet_minitaur_duck_started") import pybullet as p import pybullet_envs from custom_envs.minitaur_duck import MinitaurDuckBulletEnv env = MinitaurDuckBulletEnv(render=render_mode) elif (env_name.startswith("MinitaurBallBulletEnv")): print("bullet_minitaur_ball_started") import pybullet as p import pybullet_envs from custom_envs.minitaur_ball import MinitaurBallBulletEnv env = MinitaurBallBulletEnv(render=render_mode) elif (env_name.startswith("SlimeVolley")): print("slimevolley_swingup_started") from custom_envs.slimevolley import SlimeVolleyEnv, SurvivalRewardEnv env = SlimeVolleyEnv() env = SurvivalRewardEnv(env) # optional elif (env_name.startswith("CartPoleSwingUp")): print("cartpole_swingup_started") from custom_envs.cartpole_swingup import CartPoleSwingUpEnv env = CartPoleSwingUpEnv() elif (env_name.startswith("KukaBulletEnv")): import pybullet as p import pybullet_envs import pybullet_envs.bullet.kukaGymEnv as kukaGymEnv print("bullet_kuka_grasping started") env = kukaGymEnv.KukaGymEnv(renders=render_mode, isDiscrete=False) elif (env_name.startswith("NoDeathAntBullet")): print("no death bullet ant.") import pybullet_envs from custom_envs.custom_wrapper import NoDeath env = NoDeath(gym.make("AntBulletEnv-v0")) if render_mode and not env_name.startswith("Roboschool"): env.render("human") else: if env_name.startswith("Roboschool"): import roboschool if "Bullet" in env_name: import pybullet_envs env = gym.make(env_name) if render_mode and not env_name.startswith("Roboschool"): env.render("human") if (seed >= 0): env.seed(seed) ''' print("environment details") print("env.action_space", env.action_space) print("high, low", env.action_space.high, env.action_space.low) print("environment details") print("env.observation_space", env.observation_space) print("high, low", env.observation_space.high, env.observation_space.low) assert False ''' return env
import pybullet_envs.bullet.minitaur_gym_env as e from stable_baselines import PPO2 from stable_baselines.common.policies import MlpPolicy total_timesteps = 1000000 env = e.MinitaurBulletEnv(render=False) model = PPO2(MlpPolicy,env=env,verbose=1,tensorboard_log="./tensorboard") model.learn(total_timesteps=total_timesteps) model.save("./model/model{}".format(total_timesteps)) print("model training done!")
import sys sys.path.insert(0, '../..') import libs_agents import numpy import time import gym from pybullet_envs.bullet import minitaur_gym_env from pybullet_envs.bullet import minitaur_env_randomizer randomizer = (minitaur_env_randomizer.MinitaurEnvRandomizer()) env = minitaur_gym_env.MinitaurBulletEnv(render=True, leg_model_enabled=False, motor_velocity_limit=numpy.inf, pd_control_enabled=True, accurate_motor_model_enabled=True, motor_overheat_protection=True, env_randomizer=randomizer, hard_reset=False) class Wrapper(gym.Wrapper): def __init__(self, env): gym.Wrapper.__init__(self, env) def step(self, action): action_ = numpy.pi * (0.1 * action + 0.5) return self.env.step(action_) env = Wrapper(env)
import gym #got this from # https://usermanual.wiki/Document/pybullet20quickstart20guide.479068914/help # import pybullet_envs.bullet.minitaur_gym_env as e env = e.MinitaurBulletEnv(render=True) class MultiEnv(gym.Env): def __init__(self, env_config): # pick actual env based on worker and env indexes self.env = gym.make( choose_env_for(env_config.worker_index, env_config.vector_index)) self.action_space = self.env.action_space self.observation_space = self.env.observation_space def reset(self): return self.env.reset() def step(self, action): return self.env.step(action) register_env("multienv", lambda config: MultiEnv(config)) trainer = ppo.PPOTrainer(env="my_env")