Exemplo n.º 1
0
class StreetFighterEnv(gym.Env):
    def __init__(self, index, difficulty, monitor=None):
        roms_path = "roms/"
        self.env = Environment("env{}".format(index),
                               roms_path,
                               difficulty=difficulty)
        if monitor:
            self.monitor = monitor
        else:
            self.monitor = None
        self.env.start()

        self.action_space = gym.spaces.Discrete(90)
        self.observation_space = gym.spaces.Box(low=0,
                                                high=1,
                                                shape=(3 + self.action_space.n,
                                                       128, 128),
                                                dtype=np.float32)

    def step(self, action):
        move_action = action // 10
        attack_action = action % 10
        frames, reward, round_done, stage_done, game_done = self.env.step(
            move_action, attack_action)

        if self.monitor:
            for frame in frames:
                self.monitor.record(frame)

        states = np.zeros(self.observation_space.shape, dtype=np.float32)

        if not (round_done or stage_done or game_done):
            states[:3, :] = process_frame(frames[-1])
        else:
            self.env.reset()
            action = 80
        states[action + 3, :] = 1

        reward = reward["P1"] / 10
        if stage_done:
            reward += 3
        elif game_done:
            reward -= 5

        info = {
            'stage_done': stage_done,
            'round_done': round_done,
            'stage': self.env.stage
        }
        return states, reward, game_done, info

    def reset(self):
        self.env.new_game()

        states = np.zeros(self.observation_space.shape, dtype=np.float32)
        states[80 + 3, :] = 1
        return states

    def __exit__(self, *args):
        return self.env.close()
Exemplo n.º 2
0
def main(argv):
    roms_path = "roms/"
    env = Environment("env1", roms_path)

    policy_gradient = PolicyGradientBuilder(100800, 100800, False)

    rl = AgentProxy(env, 100800)
    dqn = DQNFlyweight(agent=rl)
    net = ReinforcementLearning(rl)

    env.start()
    while True:
        move_action = random.randint(0, 8)
        p_move_action = rl.action_space_down_sample(move_action)
        steps_move_action = net.steps_action(p_move_action)
        attack_action = random.randint(0, 9)
        p_attack_action = rl.action_space_down_sample(attack_action)
        steps_attack_action = net.steps_action(p_attack_action)
        #frames, reward, round_done, stage_done, game_done = env.step(move_action, attack_action)
        frames, reward, \
        round_done, stage_done, \
        game_done = policy_gradient.learn(steps_move_action, steps_attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()
Exemplo n.º 3
0
def run_env(worker_id, roms_path):
    env = Environment(f"env{worker_id}", roms_path)
    env.start()
    while True:
        move_action = random.randint(0, 8)
        attack_action = random.randint(0, 9)
        frames, reward, round_done, stage_done, game_done = env.step(
            move_action, attack_action)
        if game_done:
            env.new_game()
        elif stage_done:
            env.next_stage()
        elif round_done:
            env.next_round()
Exemplo n.º 4
0
class StreetFighterEnv(object):
    def __init__(self, index, monitor=None):
        roms_path = "/home/zhangchao/Downloads/"
        self.env = Environment("env{}".format(index), roms_path)
        if monitor:
            self.monitor = monitor
        else:
            self.monitor = None
        self.env.start()

    # 单步执行返回数据:游戏画面,一轮结束,场景切换,游戏结束
    def step(self, action):
        move_action = action // 10
        attack_action = action % 10
        frames, reward, round_done, stage_done, game_done = self.env.step(
            move_action, attack_action)

        if self.monitor:
            for frame in frames:
                # 通过管道pipe为子进程写入图像数据
                self.monitor.record(frame)
        if not (round_done or stage_done or game_done):
            # frame拼接
            frames = np.concatenate([process_frame(frame) for frame in frames],
                                    0)[None, :, :, :].astype(np.float32)
        else:
            frames = np.zeros((1, 3, 168, 168), dtype=np.float32)

        # 奖励设计
        reward = reward["P1"]
        if stage_done:
            reward = 25
        elif game_done:
            reward = -50
        reward *= (1 + (self.env.stage - 1) / 10)
        reward /= 10

        return frames, reward, round_done, stage_done, game_done

    # 重启
    def reset(self, round_done, stage_done, game_done):
        if game_done:
            self.env.new_game()
        elif stage_done:
            self.env.next_stage()
        elif round_done:
            self.env.next_round()
        return np.zeros((1, 3, 168, 168), dtype=np.float32)
Exemplo n.º 5
0
import random
from MAMEToolkit.sf_environment import Environment

roms_path = 'rom/'

env = Environment("sfiii3n", roms_path)

env.start()
while True:
    move_action = random.randint(0, 8)
    attack_action = random.randint(0, 9)
    frames, reward, round_done, stage_done, game_done = env.step(
        move_action, attack_action)
    if game_done:
        env.new_game()
    elif stage_done:
        env.next_stage()
    elif round_done:
        env.next_round()
Exemplo n.º 6
0
class MacroStreetFighterEnv(gym.Env):
    def __init__(self, index, difficulty, monitor=None):
        roms_path = "roms/"
        self.env = Environment("env{}".format(index),
                               roms_path,
                               difficulty=difficulty)
        if monitor:
            self.monitor = monitor
        else:
            self.monitor = None
        self.env.start()

        self.action_space = gym.spaces.Discrete(18 + MACRO_NUMS)
        self.observation_space = gym.spaces.Box(low=0,
                                                high=1,
                                                shape=(3 + self.action_space.n,
                                                       128, 128),
                                                dtype=np.float32)

    def step(self, action):
        frames, reward, round_done, stage_done, game_done = self.step_(action)

        if self.monitor:
            for frame in frames:
                self.monitor.record(frame)

        states = np.zeros(self.observation_space.shape, dtype=np.float32)

        if not (round_done or stage_done or game_done):
            states[:3, :] = process_frame(frames[-1])
        else:
            self.env.reset()
            action = 8

        states[action + 3, :] = 1

        reward = reward["P1"] / 10
        if stage_done:
            reward += 3
        elif game_done:
            reward -= 5

        info = {
            'stage_done': stage_done,
            'round_done': round_done,
            'stage': self.env.stage
        }
        return states, reward, game_done, info

    def step_(self, action):
        if self.env.started:
            if not self.env.round_done and not self.env.stage_done and not self.env.game_done:

                if action < 9:
                    actions = index_to_move_action(action)
                elif action < 18:
                    actions = index_to_attack_action(action - 9)
                elif action < 18 + MACRO_NUMS:
                    actions = index_to_comb[action - 18]()
                else:
                    raise EnvironmentError("Action out of range")

                if action < 18:
                    data = self.env.gather_frames(actions)
                else:
                    data = self.sub_step_(actions)

                data = self.env.check_done(data)
                return data["frame"], data[
                    "rewards"], self.env.round_done, self.env.stage_done, self.env.game_done
            else:
                raise EnvironmentError(
                    "Attempted to step while characters are not fighting")
        else:
            raise EnvironmentError("Start must be called before stepping")

    def sub_step_(self, actions):
        frames = []
        for step in actions:
            for i in range(step["hold"]):
                data = self.env.emu.step(
                    [action.value for action in step["actions"]])
                frames.append(data['frame'])
        data = self.env.emu.step([])
        frames.append(data['frame'])

        p1_diff = (self.env.expected_health["P1"] - data["healthP1"])
        p2_diff = (self.env.expected_health["P2"] - data["healthP2"])
        self.env.expected_health = {
            "P1": data["healthP1"],
            "P2": data["healthP2"]
        }

        rewards = {"P1": (p2_diff - p1_diff), "P2": (p1_diff - p2_diff)}

        data["rewards"] = rewards
        data["frame"] = frames
        return data

    def reset(self):
        self.env.new_game()

        states = np.zeros(self.observation_space.shape, dtype=np.float32)
        states[8 + 3, :] = 1
        return states

    def __exit__(self, *args):
        return self.env.close()