示例#1
0
        def setup_opponents(self):
            if self.opponent_type == 'rules':
                self.opponent_agent = Agent('greedy')
            else:
                # incremental load of new model
                best_model_name = get_best_model_name(self.name)
                if self.best_model_name != best_model_name:
                    self.opponent_models.append(
                        load_model(self, best_model_name))
                    self.best_model_name = best_model_name

                if self.opponent_type == 'random':
                    start = 0
                    end = len(self.opponent_models) - 1
                    i = random.randint(start, end)
                    self.opponent_agent = Agent('ppo_opponent',
                                                self.opponent_models[i])

                elif self.opponent_type == 'best':
                    self.opponent_agent = Agent('ppo_opponent',
                                                self.opponent_models[-1])

                elif self.opponent_type == 'mostly_best':
                    j = random.uniform(0, 1)
                    if j < 0.8:
                        self.opponent_agent = Agent('ppo_opponent',
                                                    self.opponent_models[-1])
                    else:
                        start = 0
                        end = len(self.opponent_models) - 1
                        i = random.randint(start, end)
                        self.opponent_agent = Agent('ppo_opponent',
                                                    self.opponent_models[i])

                elif self.opponent_type == 'base':
                    self.opponent_agent = Agent('base',
                                                self.opponent_models[0])

            self.agent_player_num = np.random.choice(self.n_players)
            self.agents = [self.opponent_agent] * self.n_players
            self.agents[self.agent_player_num] = None
            try:
                # if self.players is defined on the base environment
                logger.debug(
                    f'Agent plays as Player {self.players[self.agent_player_num].id}'
                )
            except:
                pass
示例#2
0
 def __init__(self, env, name="AI_PPO", size=6):
     super().__init__(name)
     # make environment
     ppo_model = load_model(env, f'best_model{size}.zip')
     self.ppo_agent = Agent('best_model', ppo_model)
示例#3
0
文件: test.py 项目: radoone/SIMPLE
def main(args):

    logger.configure(config.LOGDIR)

    if args.debug:
        logger.set_level(config.DEBUG)
    else:
        logger.set_level(config.INFO)

    # make environment
    env = get_environment(args.env_name)(verbose=args.verbose,
                                         manual=args.manual)
    env.seed(args.seed)

    total_rewards = {}

    first_time = True

    if args.recommend:
        ppo_model = load_model(env, 'best_model.zip')
        ppo_agent = Agent('best_model', ppo_model)
    else:
        ppo_agent = None

    agents = []

    # load the agents
    if len(args.agents) != env.n_players:
        raise Exception(
            f'{len(args.agents)} players specified but this is a {env.n_players} player game!'
        )

    for i, agent in enumerate(args.agents):
        if agent == 'human':
            agent_obj = Agent('human')
        elif agent == 'rules':
            agent_obj = Agent('rules')
        elif agent == 'json':
            # Start mq server
            context = zmq.Context()
            socket = context.socket(zmq.REP)
            socket.bind("tcp://*:5555")
            logger.debug("zaq server start at 5555")
            agent_obj = Agent('json')
        elif agent == 'base':
            base_model = load_model(env, 'base.zip')
            agent_obj = Agent('base', base_model)
        else:
            ppo_model = load_model(env, f'{agent}.zip')
            agent_obj = Agent(agent, ppo_model)
        agents.append(agent_obj)
        total_rewards[agent_obj.id] = 0

    # play games
    logger.info(f'\nPlaying {args.games} games...')
    for game in range(args.games):
        players = agents[:]

        if args.randomise_players:
            random.shuffle(players)

        obs = env.reset()
        done = False

        for i, p in enumerate(players):
            logger.debug(f'Player {i+1} = {p.name}')

        while not done:

            current_player = players[env.current_player_num]
            env.render()
            logger.debug(f'\nCurrent player name: {current_player.name}')

            if args.recommend and current_player.name in [
                    'human', 'rules', 'json'
            ]:
                # show recommendation from last loaded model
                logger.debug(f'\nRecommendation by {ppo_agent.name}:')
                action = ppo_agent.choose_action(env,
                                                 choose_best_action=True,
                                                 mask_invalid_actions=True)

            if current_player.name == 'human':
                action = input('\nPlease choose an action: ')
                try:
                    # for int actions
                    action = int(action)
                except:
                    # for MulitDiscrete action input as list TODO
                    action = eval(action)

            if current_player.name == 'json':
                if (not first_time):
                    game_state = {
                        "legal_action":
                        [i for i, o in enumerate(env.legal_actions) if o != 0],
                        "tableCard":
                        env.tableCard.id
                    }

                    socket.send_json(game_state)

                action = socket.recv_json()
                first_time = False
                logger.debug(f'\nReceived {action}')

                #  action = input('\n JSON!!! Please choose an action: ')
                try:
                    # for int actions
                    action = int(action)
                except:
                    # for MulitDiscrete action input as list TODO
                    action = eval(action)
            elif current_player.name == 'rules':
                logger.debug(f'\n{current_player.name} model choices')
                action = current_player.choose_action(
                    env, choose_best_action=False, mask_invalid_actions=True)
            else:
                logger.debug(f'\n{current_player.name} model choices')
                action = current_player.choose_action(
                    env,
                    choose_best_action=args.best,
                    mask_invalid_actions=True)

            obs, reward, done, _ = env.step(action)

            for r, player in zip(reward, players):
                total_rewards[player.id] += r
                player.points += r

            if args.cont:
                input('Press any key to continue')

        env.render()

        logger.info(f"Played {game + 1} games: {total_rewards}")

        if args.write_results:
            write_results(players, game, args.games, env.turns_taken)

        for p in players:
            p.points = 0

    env.close()
示例#4
0
def main(args):

  logger.configure(config.LOGDIR)

  if args.debug:
    logger.set_level(config.DEBUG)
  else:
    logger.set_level(config.INFO)
    
  #make environment
  env = get_environment(args.env_name)(verbose = args.verbose, manual = args.manual)
  env.seed(args.seed)






  total_rewards = {}

  if args.recommend:
    ppo_model = load_model(env, 'best_model.zip')
    ppo_agent = Agent('best_model', ppo_model)
  else:
    ppo_agent = None


  agents = []

  #load the agents
  if len(args.agents) != env.n_players:
    raise Exception(f'{len(args.agents)} players specified but this is a {env.n_players} player game!')


  for i, agent in enumerate(args.agents):
    if agent == 'human':
      agent_obj = Agent('human')
    elif agent== 'greedy':
      agent_obj = Agent('greedy')


    elif agent == 'rules':
      agent_obj = Agent('rules')
    elif agent == 'base':
      base_model = load_model(env, 'base.zip')
      agent_obj = Agent('base', base_model)   
    else:
      ppo_model = load_model(env, f'{agent}.zip')
      agent_obj = Agent(agent, ppo_model)
    agents.append(agent_obj)
    total_rewards[agent_obj.id] = 0

  if args.env_name == "blobwar":
    human_blobwar = Human()

  #play games
  logger.info(f'\nPlaying {args.games} games...')
  for game in range(args.games):
    players = agents[:]

    if args.randomise_players:
      random.shuffle(players)

    obs = env.reset()
    done = False
    
    for i, p in enumerate(players):
      logger.debug(f'Player {i+1} = {p.name}')

    while not done:

      current_player = players[env.current_player_num]
      env.render()
      logger.debug(f'Current player name: {current_player.name}')

      if args.recommend and current_player.name in ['human', 'rules']:
        # show recommendation from last loaded model
        logger.debug(f'\nRecommendation by {ppo_agent.name}:')
        action = ppo_agent.choose_action(env, choose_best_action = True, mask_invalid_actions = True)

      if current_player.name == 'human':
        if args.env_name == "blobwar":

          move= human_blobwar.compute_next_move(env.core)
          action=env.encode_action(move)
        else:
          action = input('\nPlease choose an action: ')


        try:
          action = int(action)
        except:
          # for MulitDiscrete action input as list TODO
          action = eval(action)
      elif current_player.name == 'rules':
        logger.debug(f'\n{current_player.name} model choices')
        action = current_player.choose_action(env, choose_best_action = False, mask_invalid_actions = True)
      else:
        logger.debug(f'\n{current_player.name} model choices')
        action = current_player.choose_action(env, choose_best_action = args.best, mask_invalid_actions = True)

      obs, reward, done, _ = env.step(action)

      for r, player in zip(reward, players):
        total_rewards[player.id] += r
        player.points += r

      if args.cont:
        input('Press any key to continue')
    
    env.render()

    logger.info(f"Played {game + 1} games: {total_rewards}")

    if args.write_results:
      write_results(players, game, args.games, env.turns_taken)

    for p in players:
      p.points = 0

  env.close()