class StreetFighterEnv(gym.Env): def __init__(self, index, difficulty, monitor=None): roms_path = "roms/" self.env = Environment("env{}".format(index), roms_path, difficulty=difficulty) if monitor: self.monitor = monitor else: self.monitor = None self.env.start() self.action_space = gym.spaces.Discrete(90) self.observation_space = gym.spaces.Box(low=0, high=1, shape=(3 + self.action_space.n, 128, 128), dtype=np.float32) def step(self, action): move_action = action // 10 attack_action = action % 10 frames, reward, round_done, stage_done, game_done = self.env.step( move_action, attack_action) if self.monitor: for frame in frames: self.monitor.record(frame) states = np.zeros(self.observation_space.shape, dtype=np.float32) if not (round_done or stage_done or game_done): states[:3, :] = process_frame(frames[-1]) else: self.env.reset() action = 80 states[action + 3, :] = 1 reward = reward["P1"] / 10 if stage_done: reward += 3 elif game_done: reward -= 5 info = { 'stage_done': stage_done, 'round_done': round_done, 'stage': self.env.stage } return states, reward, game_done, info def reset(self): self.env.new_game() states = np.zeros(self.observation_space.shape, dtype=np.float32) states[80 + 3, :] = 1 return states def __exit__(self, *args): return self.env.close()
def main(argv): roms_path = "roms/" env = Environment("env1", roms_path) policy_gradient = PolicyGradientBuilder(100800, 100800, False) rl = AgentProxy(env, 100800) dqn = DQNFlyweight(agent=rl) net = ReinforcementLearning(rl) env.start() while True: move_action = random.randint(0, 8) p_move_action = rl.action_space_down_sample(move_action) steps_move_action = net.steps_action(p_move_action) attack_action = random.randint(0, 9) p_attack_action = rl.action_space_down_sample(attack_action) steps_attack_action = net.steps_action(p_attack_action) #frames, reward, round_done, stage_done, game_done = env.step(move_action, attack_action) frames, reward, \ round_done, stage_done, \ game_done = policy_gradient.learn(steps_move_action, steps_attack_action) if game_done: env.new_game() elif stage_done: env.next_stage() elif round_done: env.next_round()
def run_env(worker_id, roms_path): env = Environment(f"env{worker_id}", roms_path) env.start() while True: move_action = random.randint(0, 8) attack_action = random.randint(0, 9) frames, reward, round_done, stage_done, game_done = env.step( move_action, attack_action) if game_done: env.new_game() elif stage_done: env.next_stage() elif round_done: env.next_round()
class StreetFighterEnv(object): def __init__(self, index, monitor=None): roms_path = "/home/zhangchao/Downloads/" self.env = Environment("env{}".format(index), roms_path) if monitor: self.monitor = monitor else: self.monitor = None self.env.start() # 单步执行返回数据:游戏画面,一轮结束,场景切换,游戏结束 def step(self, action): move_action = action // 10 attack_action = action % 10 frames, reward, round_done, stage_done, game_done = self.env.step( move_action, attack_action) if self.monitor: for frame in frames: # 通过管道pipe为子进程写入图像数据 self.monitor.record(frame) if not (round_done or stage_done or game_done): # frame拼接 frames = np.concatenate([process_frame(frame) for frame in frames], 0)[None, :, :, :].astype(np.float32) else: frames = np.zeros((1, 3, 168, 168), dtype=np.float32) # 奖励设计 reward = reward["P1"] if stage_done: reward = 25 elif game_done: reward = -50 reward *= (1 + (self.env.stage - 1) / 10) reward /= 10 return frames, reward, round_done, stage_done, game_done # 重启 def reset(self, round_done, stage_done, game_done): if game_done: self.env.new_game() elif stage_done: self.env.next_stage() elif round_done: self.env.next_round() return np.zeros((1, 3, 168, 168), dtype=np.float32)
import random from MAMEToolkit.sf_environment import Environment roms_path = 'rom/' env = Environment("sfiii3n", roms_path) env.start() while True: move_action = random.randint(0, 8) attack_action = random.randint(0, 9) frames, reward, round_done, stage_done, game_done = env.step( move_action, attack_action) if game_done: env.new_game() elif stage_done: env.next_stage() elif round_done: env.next_round()
class MacroStreetFighterEnv(gym.Env): def __init__(self, index, difficulty, monitor=None): roms_path = "roms/" self.env = Environment("env{}".format(index), roms_path, difficulty=difficulty) if monitor: self.monitor = monitor else: self.monitor = None self.env.start() self.action_space = gym.spaces.Discrete(18 + MACRO_NUMS) self.observation_space = gym.spaces.Box(low=0, high=1, shape=(3 + self.action_space.n, 128, 128), dtype=np.float32) def step(self, action): frames, reward, round_done, stage_done, game_done = self.step_(action) if self.monitor: for frame in frames: self.monitor.record(frame) states = np.zeros(self.observation_space.shape, dtype=np.float32) if not (round_done or stage_done or game_done): states[:3, :] = process_frame(frames[-1]) else: self.env.reset() action = 8 states[action + 3, :] = 1 reward = reward["P1"] / 10 if stage_done: reward += 3 elif game_done: reward -= 5 info = { 'stage_done': stage_done, 'round_done': round_done, 'stage': self.env.stage } return states, reward, game_done, info def step_(self, action): if self.env.started: if not self.env.round_done and not self.env.stage_done and not self.env.game_done: if action < 9: actions = index_to_move_action(action) elif action < 18: actions = index_to_attack_action(action - 9) elif action < 18 + MACRO_NUMS: actions = index_to_comb[action - 18]() else: raise EnvironmentError("Action out of range") if action < 18: data = self.env.gather_frames(actions) else: data = self.sub_step_(actions) data = self.env.check_done(data) return data["frame"], data[ "rewards"], self.env.round_done, self.env.stage_done, self.env.game_done else: raise EnvironmentError( "Attempted to step while characters are not fighting") else: raise EnvironmentError("Start must be called before stepping") def sub_step_(self, actions): frames = [] for step in actions: for i in range(step["hold"]): data = self.env.emu.step( [action.value for action in step["actions"]]) frames.append(data['frame']) data = self.env.emu.step([]) frames.append(data['frame']) p1_diff = (self.env.expected_health["P1"] - data["healthP1"]) p2_diff = (self.env.expected_health["P2"] - data["healthP2"]) self.env.expected_health = { "P1": data["healthP1"], "P2": data["healthP2"] } rewards = {"P1": (p2_diff - p1_diff), "P2": (p1_diff - p2_diff)} data["rewards"] = rewards data["frame"] = frames return data def reset(self): self.env.new_game() states = np.zeros(self.observation_space.shape, dtype=np.float32) states[8 + 3, :] = 1 return states def __exit__(self, *args): return self.env.close()