def global_step(self): self.frame += 1 if not self.multiplayer: # Trainer self.p1x.set_servo_target(self.trainer_x, 1.0, 0.02, 0.02, 4) self.p1y.set_servo_target(self.trainer_y, 1.0, 0.02, 0.02, 4) Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) if ball_vx * self.timeout_dir < 0: if self.timeout_dir < 0: self.score_left += 0.01 * np.abs( ball_vx) # hint for early learning: hit the ball! else: self.score_right += 0.01 * np.abs(ball_vx) self.timeout_dir *= -1 self.timeout = 150 self.bounce_n += 1 else: self.timeout -= 1 if np.abs(self.ball_x) > 1.65 or self.timeout == 0: if self.timeout == 0: self.restart_from_center( self.players_count == 1 or ball_vx < 0) # send ball in same dir on timeout elif ball_vx > 0: if self.bounce_n > 0: self.score_left += 1 self.score_right -= 1 self.restart_from_center( self.players_count == 1 or ball_vx > 0) # winning streak, let it hit more else: if self.bounce_n > 0: self.score_right += 1.0 self.score_left -= 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0)
def global_step(self): self.frame += 1 # if not self.multiplayer: # # Trainer # self.p1x.set_servo_target( self.trainer_x, 0.02, 0.02, 4 ) # self.p1y.set_servo_target( self.trainer_y, 0.02, 0.02, 4 ) Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) if ball_vx * self.timeout_dir < 0: # if self.timeout_dir < 0: # self.score_left += 0.00*np.abs(ball_vx) # hint for early learning: hit the ball! # else: # self.score_right += 0.00*np.abs(ball_vx) self.timeout_dir *= -1 self.timeout = self.TIMEOUT self.bounce_n += 1
def global_step(self): self.frame += 1 Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) if ball_vx * self.timeout_dir < 0: # if self.timeout_dir < 0: # self.score_left += 0.00*np.abs(ball_vx) # hint for early learning: hit the ball! # else: # self.score_right += 0.00*np.abs(ball_vx) self.timeout_dir *= -1 # self.timeout = self.TIMEOUT self.bounce_n += 1 # print("bounce", self.bounce_n) # else: # self.timeout -= 1 if np.abs(self.ball_x) > 1.65 or self.timeout == 0: if self.timeout == 0: self.restart_from_center(ball_vx < 0) # send ball in same dir on timeout # if self.score_right + self.score_left > elif ball_vx > 0: self.score_left += 1 self.restart_from_center(ball_vx > 0) # winning streak, let it hit more else: self.score_right += 1.0 self.restart_from_center(ball_vx > 0) self.timeout = self.TIMEOUT else: self.timeout -= 1
def global_step(self): self.frame += 1 if not self.multiplayer: # Trainer self.p1x.set_servo_target(self.trainer_x, 0.02, 0.02, 4) self.p1y.set_servo_target(self.trainer_y, 0.02, 0.02, 4) Scene.global_step(self) self.ball_x, ball_vx = self.ballx.current_position() self.ball_y, ball_vy = self.bally.current_position() if np.abs(self.ball_y) > 1.0 and self.ball_y * ball_vy > 0: self.bally.reset_current_position(self.ball_y, -ball_vy) # Add by xian, according to the status of Pong, give different scores # The following if / else are adaptively changed if ball_vx * self.timeout_dir < 0: if self.timeout_dir < 0: self.score_left += 0.01 * np.abs(ball_vx) # hint for early learning: hit the ball! else: self.score_right += 0.01 * np.abs(ball_vx) self.timeout_dir *= -1 self.timeout = self.TIMEOUT # hua-todo self.bounce_n += 1 if self.ball_x > 0: # hua-todo self.bounce_n_right += 1 self.bounce_n_left = 0 else: self.bounce_n_left += 1 self.bounce_n_right = 0 else: self.timeout -= 1 self.episode_time -= 1 if self.bounce_n_right > 1: # self.score_left += 1 self.score_right -= 1 self.score_board['left']['oppo_double_hit'] += 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) elif self.bounce_n_left > 1: self.score_left -= 1 self.score_right += 1 self.score_board['right']['oppo_double_hit'] += 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) if np.abs(self.ball_x) > 1.65 or self.timeout == 0: if self.timeout == 0: if np.abs(self.ball_x) < 1: if self.timeout_dir < 0: self.score_left += 1 self.score_right -= 1 self.score_board['left']['oppo_slow_ball'] += 1 else: self.score_left -= 1 self.score_right += 1 self.score_board['right']['oppo_slow_ball'] += 1 else: if self.timeout_dir < 0: self.score_left -= 1 self.score_right += 1 self.score_board['right']['oppo_miss_catch'] += 1 else: self.score_left += 1 self.score_right -= 1 self.score_board['left']['oppo_miss_catch'] += 1 self.restart_from_center(self.players_count == 1 or ball_vx < 0) # send ball in same dir on timeout elif ball_vx > 0: if self.bounce_n > 0: self.score_left += 1 self.score_board['left']['oppo_miss_catch'] += 1 else: self.score_board['left']['oppo_miss_start'] += 1 self.score_right -= 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) # winning streak, let it hit more else: if self.bounce_n > 0: self.score_right += 1.0 self.score_board['right']['oppo_miss_catch'] += 1 else: self.score_board['right']['oppo_miss_start'] += 1 self.score_left -= 1 self.restart_from_center(self.players_count == 1 or ball_vx > 0) elif self.episode_time < 0: self.score_left -= 1 self.score_board['left']['not_finish'] -= 1 self.restart_from_center(self.players_count == 1 or ball_vx < 0)