def main(argv): roms_path = "roms/" env = Environment("env1", roms_path) policy_gradient = PolicyGradientBuilder(100800, 100800, False) rl = AgentProxy(env, 100800) dqn = DQNFlyweight(agent=rl) net = ReinforcementLearning(rl) env.start() while True: move_action = random.randint(0, 8) p_move_action = rl.action_space_down_sample(move_action) steps_move_action = net.steps_action(p_move_action) attack_action = random.randint(0, 9) p_attack_action = rl.action_space_down_sample(attack_action) steps_attack_action = net.steps_action(p_attack_action) #frames, reward, round_done, stage_done, game_done = env.step(move_action, attack_action) frames, reward, \ round_done, stage_done, \ game_done = policy_gradient.learn(steps_move_action, steps_attack_action) if game_done: env.new_game() elif stage_done: env.next_stage() elif round_done: env.next_round()
def run_env(worker_id, roms_path): env = Environment(f"env{worker_id}", roms_path) env.start() while True: move_action = random.randint(0, 8) attack_action = random.randint(0, 9) frames, reward, round_done, stage_done, game_done = env.step( move_action, attack_action) if game_done: env.new_game() elif stage_done: env.next_stage() elif round_done: env.next_round()
class StreetFighterEnv(object): def __init__(self, index, monitor=None): roms_path = "/home/zhangchao/Downloads/" self.env = Environment("env{}".format(index), roms_path) if monitor: self.monitor = monitor else: self.monitor = None self.env.start() # 单步执行返回数据:游戏画面,一轮结束,场景切换,游戏结束 def step(self, action): move_action = action // 10 attack_action = action % 10 frames, reward, round_done, stage_done, game_done = self.env.step( move_action, attack_action) if self.monitor: for frame in frames: # 通过管道pipe为子进程写入图像数据 self.monitor.record(frame) if not (round_done or stage_done or game_done): # frame拼接 frames = np.concatenate([process_frame(frame) for frame in frames], 0)[None, :, :, :].astype(np.float32) else: frames = np.zeros((1, 3, 168, 168), dtype=np.float32) # 奖励设计 reward = reward["P1"] if stage_done: reward = 25 elif game_done: reward = -50 reward *= (1 + (self.env.stage - 1) / 10) reward /= 10 return frames, reward, round_done, stage_done, game_done # 重启 def reset(self, round_done, stage_done, game_done): if game_done: self.env.new_game() elif stage_done: self.env.next_stage() elif round_done: self.env.next_round() return np.zeros((1, 3, 168, 168), dtype=np.float32)
import random from MAMEToolkit.sf_environment import Environment roms_path = 'rom/' env = Environment("sfiii3n", roms_path) env.start() while True: move_action = random.randint(0, 8) attack_action = random.randint(0, 9) frames, reward, round_done, stage_done, game_done = env.step( move_action, attack_action) if game_done: env.new_game() elif stage_done: env.next_stage() elif round_done: env.next_round()