def episode_restart(self): Scene.episode_restart(self) if self.score_right + self.score_left > 0: sys.stdout.write("%i:%i " % (self.score_left, self.score_right)) sys.stdout.flush() self.mjcf = self.cpp_world.load_mjcf(os.path.join(os.path.dirname(__file__), "models_robot/roboschool_pong.xml")) dump = 0 for r in self.mjcf: if dump: print("ROBOT '%s'" % r.root_part.name) for part in r.parts: if dump: print("\tPART '%s'" % part.name) #if part.name==self.robot_name: for j in r.joints: if j.name=="p0x": self.p0x = j if j.name=="p0y": self.p0y = j if j.name=="p1x": self.p1x = j if j.name=="p1y": self.p1y = j if j.name=="ballx": self.ballx = j if j.name=="bally": self.bally = j self.ballx.set_motor_torque(0.0) self.bally.set_motor_torque(0.0) for r in self.mjcf: r.query_position() fpose = cpp_household.Pose() fpose.set_xyz(0,0,-0.04) self.field = self.cpp_world.load_thingy( os.path.join(os.path.dirname(__file__), "models_outdoor/stadium/pong1.obj"), fpose, 1.0, 0, 0xFFFFFF, True) self.camera = self.cpp_world.new_camera_free_float(self.VIDEO_W, self.VIDEO_H, "video_camera") self.camera_itertia = 0 self.frame = 0 self.jstate_for_frame = -1 self.score_left = 0 self.score_right = 0 self.restart_from_center(self.players_count==1 or self.np_random.randint(2)==0)
def episode_restart(self): Scene.episode_restart(self) # contains cpp_world.clean_everything() stadium_pose = cpp_household.Pose() if self.zero_at_running_strip_start_line: stadium_pose.set_xyz(27, 21, 0) # see RUN_STARTLINE, RUN_RAD constants self.stadium = self.cpp_world.load_thingy( os.path.join(os.path.dirname(__file__), "models_outdoor/stadium/stadium1.obj"), stadium_pose, 1.0, 0, 0xFFFFFF, True) self.ground_plane_mjcf = self.cpp_world.load_mjcf(os.path.join(os.path.dirname(__file__), "mujoco_assets/ground_plane.xml"))
def episode_restart(self): Scene.episode_restart(self) # contains cpp_world.clean_everything() lab_ground_pose = cpp_household.Pose() lab_ground_pose.set_xyz(0, 0, 1) lab_ground_pose.set_rpy(np.pi / 2, 0, 0) # if self.zero_at_running_strip_start_line: # lab_pose.set_xyz(0, 0, 0) # see RUN_STARTLINE, RUN_RAD constants # scale seems not working self.lab_ground = self.cpp_world.load_thingy( os.path.join(os.path.dirname(__file__), "models_indoor/floor.obj"), lab_ground_pose, 1.0, 0, 0xFFFFFF, True) table_pose = cpp_household.Pose() table_pose.set_rpy(0, 0, np.pi / 2) table_pose.set_xyz(0.7, 0, 0.75) self.table = self.cpp_world.load_urdf( os.path.join(os.path.dirname(__file__), "models_indoor/lab/bordered_table.urdf"), table_pose, False, False) slope_pose = cpp_household.Pose() slope_pose.set_rpy(0, 0, np.pi / 2) slope_pose.set_xyz(0.6, -0.4, 0.8) self.slope = self.cpp_world.load_urdf( os.path.join(os.path.dirname(__file__), "models_indoor/lab/slope.urdf"), slope_pose, False, False) ball_pose = cpp_household.Pose() ball_pose.set_xyz(0.6, -0.4, 0.88) self.ball = self.cpp_world.load_urdf( os.path.join(os.path.dirname(__file__), "models_indoor/lab/ball.urdf"), ball_pose, False, False) self.ball_home_pose = ball_pose self.ball_r = 0.025 frame_pose = cpp_household.Pose() frame_pose.set_xyz(.95, 0.4, 0.88) frame_pose.set_rpy(0, 0, np.pi / 2) self.frame = self.cpp_world.load_urdf( os.path.join(os.path.dirname(__file__), "models_indoor/lab/frame.urdf"), frame_pose, False, False) self.frame_home_pose = frame_pose self.frame_width = 0.14 self.frame_height = 0.07 self.ground_plane_mjcf = self.cpp_world.load_mjcf( os.path.join(os.path.dirname(__file__), "mujoco_assets/ground_plane.xml"))
def __init__(self): Scene.__init__(self, gravity=9.8, timestep=0.0165 / 4, frame_skip=4) self.score_left = 0 self.score_right = 0 self.ball_x = 0 # Add by xian, record information self.score_board = {"left": {"oppo_miss_start": 0, "oppo_slow_ball": 0, "oppo_miss_catch": 0, "oppo_double_hit": 0, "not_finish": 0}, "right": {"oppo_miss_start": 0, "oppo_slow_ball": 0, "oppo_miss_catch": 0, "oppo_double_hit": 0}, }
def global_step(self): self.frame += 1 if not self.multiplayer: # Trainer self.p1x.set_servo_target(self.trainer_x, 1.0, 0.02, 0.02, 4) self.p1y.set_servo_target(self.trainer_y, 1.0, 0.02, 0.02, 4) Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) if ball_vx * self.timeout_dir < 0: if self.timeout_dir < 0: self.score_left += 0.01 * np.abs( ball_vx) # hint for early learning: hit the ball! else: self.score_right += 0.01 * np.abs(ball_vx) self.timeout_dir *= -1 self.timeout = 150 self.bounce_n += 1 else: self.timeout -= 1 if np.abs(self.ball_x) > 1.65 or self.timeout == 0: if self.timeout == 0: self.restart_from_center( self.players_count == 1 or ball_vx < 0) # send ball in same dir on timeout elif ball_vx > 0: if self.bounce_n > 0: self.score_left += 1 self.score_right -= 1 self.restart_from_center( self.players_count == 1 or ball_vx > 0) # winning streak, let it hit more else: if self.bounce_n > 0: self.score_right += 1.0 self.score_left -= 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0)
def episode_restart(self): Scene.episode_restart(self) stadium_pose = cpp_household.Pose() if self.zero_at_running_strip_start_line: stadium_pose.set_xyz(27, 21, 0) if self.render: if self.inclined: self.hfield = self.cpp_world.load_thingy( 'assets/incline_grass.obj', stadium_pose, 1.0, 0, 0xFFFFFF, True) else: self.stadium = self.cpp_world.load_thingy( os.path.join(os.path.dirname(roboschool.__file__), "models_outdoor/stadium/stadium1.obj"), stadium_pose, 1.0, 0, 0xFFFFFF, True) if self.inclined: self.ground_plane_mjcf = self.cpp_world.load_mjcf( "assets/incline_plane.mjcf") else: self.ground_plane_mjcf = self.cpp_world.load_mjcf( "assets/level_plane.mjcf")
def global_step(self): self.frame += 1 # if not self.multiplayer: # # Trainer # self.p1x.set_servo_target( self.trainer_x, 0.02, 0.02, 4 ) # self.p1y.set_servo_target( self.trainer_y, 0.02, 0.02, 4 ) Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) if ball_vx * self.timeout_dir < 0: # if self.timeout_dir < 0: # self.score_left += 0.00*np.abs(ball_vx) # hint for early learning: hit the ball! # else: # self.score_right += 0.00*np.abs(ball_vx) self.timeout_dir *= -1 self.timeout = self.TIMEOUT self.bounce_n += 1
def global_step(self): self.frame += 1 Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) if ball_vx * self.timeout_dir < 0: # if self.timeout_dir < 0: # self.score_left += 0.00*np.abs(ball_vx) # hint for early learning: hit the ball! # else: # self.score_right += 0.00*np.abs(ball_vx) self.timeout_dir *= -1 # self.timeout = self.TIMEOUT self.bounce_n += 1 # print("bounce", self.bounce_n) # else: # self.timeout -= 1 if np.abs(self.ball_x) > 1.65 or self.timeout == 0: if self.timeout == 0: self.restart_from_center(ball_vx < 0) # send ball in same dir on timeout # if self.score_right + self.score_left > elif ball_vx > 0: self.score_left += 1 self.restart_from_center(ball_vx > 0) # winning streak, let it hit more else: self.score_right += 1.0 self.restart_from_center(ball_vx > 0) self.timeout = self.TIMEOUT else: self.timeout -= 1
def __init__(self): Scene.__init__(self, gravity=9.8, timestep=0.0165 / 4, frame_skip=4) self.score_left = 0 self.score_right = 0
def global_step(self): self.frame += 1 if not self.multiplayer: # Trainer self.p1x.set_servo_target(self.trainer_x, 0.02, 0.02, 4) self.p1y.set_servo_target(self.trainer_y, 0.02, 0.02, 4) Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) # Add by xian, according to the status of Pong, give different scores # The following if / else are adaptively changed if ball_vx * self.timeout_dir < 0: if self.timeout_dir < 0: self.score_left += 0.01 * np.abs(ball_vx) # hint for early learning: hit the ball! else: self.score_right += 0.01 * np.abs(ball_vx) self.timeout_dir *= -1 self.timeout = self.TIMEOUT # hua-todo self.bounce_n += 1 if self.ball_x > 0: # hua-todo self.bounce_n_right += 1 self.bounce_n_left = 0 else: self.bounce_n_left += 1 self.bounce_n_right = 0 else: self.timeout -= 1 self.episode_time -= 1 if self.bounce_n_right > 1: # self.score_left += 1 self.score_right -= 1 self.score_board['left']['oppo_double_hit'] += 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) elif self.bounce_n_left > 1: self.score_left -= 1 self.score_right += 1 self.score_board['right']['oppo_double_hit'] += 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) if np.abs(self.ball_x) > 1.65 or self.timeout == 0: if self.timeout == 0: if np.abs(self.ball_x) < 1: if self.timeout_dir < 0: self.score_left += 1 self.score_right -= 1 self.score_board['left']['oppo_slow_ball'] += 1 else: self.score_left -= 1 self.score_right += 1 self.score_board['right']['oppo_slow_ball'] += 1 else: if self.timeout_dir < 0: self.score_left -= 1 self.score_right += 1 self.score_board['right']['oppo_miss_catch'] += 1 else: self.score_left += 1 self.score_right -= 1 self.score_board['left']['oppo_miss_catch'] += 1 self.restart_from_center(self.players_count == 1 or ball_vx < 0) # send ball in same dir on timeout elif ball_vx > 0: if self.bounce_n > 0: self.score_left += 1 self.score_board['left']['oppo_miss_catch'] += 1 else: self.score_board['left']['oppo_miss_start'] += 1 self.score_right -= 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) # winning streak, let it hit more else: if self.bounce_n > 0: self.score_right += 1.0 self.score_board['right']['oppo_miss_catch'] += 1 else: self.score_board['right']['oppo_miss_start'] += 1 self.score_left -= 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) elif self.episode_time < 0: self.score_left -= 1 self.score_board['left']['not_finish'] -= 1 self.restart_from_center(self.players_count == 1 or ball_vx < 0)