示例#1
0
def run_one_episode(arguments):
    neural_network, environment, num_episodes, instance_id \
        = arguments

    # Create a Agent list using the neural network
    agent_list = [
        sa.RandomAgent(),
        sa.RandomAgent(),
        sa.RandomAgent(),
        sa.RandomAgent()
    ]

    env = pommerman.make(environment, agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(int(num_episodes)):
        state = env.reset()
        done = False
        while not done:
            # env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)

        print('Episode {} finished'.format(i_episode))
    env.close()

    return
def agent_vs_random(eval_agent, player, variant="TicTacToe"):
    """ Executes one game between eval_agent and a random player.

    Args:
        eval_agent: The AlphaZeroAgent instance.
        player: If player=-1, the agent plays as player two. Otherwise,
            the agent begins the game as player one.
        variant: The game variant. Either "TicTacToe" or "Connect4".
    """
    if variant == "TicTacToe":
        game_environment = game.TicTacToeOptimized()
        max_length = 9
    if variant == "Connect4":
        game_environment = game.Connect4Optimized()
        max_length = 42

    player_one = eval_agent
    player_two = agent.RandomAgent(eval_agent)

    if player == -1:
        player_two = eval_agent
        player_one = agent.RandomAgent(eval_agent)

    # reset game
    game_environment.reset_game()

    player_one.join_game(game_environment)
    player_two.join_game(game_environment)

    current_player = game_environment.current_player

    winning = 0
    turn = 0

    num_simulations = config.EVALUATION['num_simulations']

    while winning is 0 and turn < max_length:
        if current_player == 0:
            winning, _, _ = player_one.play_move(num_simulations, temperature=0)
        if current_player == 1:
            winning, _, _ = player_two.play_move(num_simulations, temperature=0)

        current_player = game_environment.current_player
        turn += 1

    if current_player == 0:
        winner = -1*winning
    else:
        winner = winning

    return winner
示例#3
0
def create_agent(agent_type, *args, **kwargs):
    if agent_type.startswith('stationary'):
        pi = None
        if agent_type.endswith(']'):
            pi = agent_type.lstrip('stationary[').rstrip(']').split(',')
            pi = [float(p) for p in pi]
        return agent.StationaryAgent(*args, **kwargs, pi=pi)
    elif agent_type == 'random':
        return agent.RandomAgent(*args, **kwargs)
    elif agent_type == 'q':
        return agent.QAgent(*args, **kwargs)
    elif agent_type == 'phc':
        return agent.PHCAgent(*args, **kwargs)
    elif agent_type == 'wolf':
        return agent.WoLFAgent(*args, **kwargs)
    elif agent_type == 'minimaxq':
        return agent.MinimaxQAgent(*args, **kwargs)
    elif agent_type == 'metacontrol':
        return agent.MetaControlAgent(*args, **kwargs)
    elif agent_type == 'littmansoccerhandcoded':
        return littmansoccer.HandCodedAgent(*args, **kwargs)
    elif agent_type.endswith('pickle'):
        return load_agent(agent_type)
    else:
        print('no such agent: {}'.format(agent_type))
        return None
示例#4
0
def getAgent(agentType, playerNum):
    if agentType == 'human':
        return agt.HumanAgent(playerNum)
    elif agentType == 'random':
        return agt.RandomAgent()
    elif agentType == 'simple':
        return agt.SimpleAgent()
    elif agentType == 'reflex':
        return agt.ReflexAgent(playerNum)
    elif agentType == 'simple++':
        return agt.SimpleEnhancedAgent(playerNum)
示例#5
0
def main():
    # Print all possible environments in the Pommerman registry
    print(pommerman.registry)

    # Create a set of agents (exactly four)
    agent_list = [
        # agents.SimpleAgent(),
        # agents.SimpleAgent(),
        # agents.SimpleAgent(),
        # agents.SimpleAgent(),
        sa.RandomAgent(),
        sa.RandomAgent(),
        sa.RandomAgent(),
        sa.RandomAgent()
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    # ['PommeFFACompetition-v0', 'PommeFFACompetitionFast-v0', 'PommeFFAFast-v0', 'PommeFFA-v1', 'PommeRadio-v2', 'PommeTeamCompetition-v0', 'PommeTeam-v0', 'PommeTeamFast-v0']
    # env = pommerman.make('PommeFFACompetitionFast-v0', agent_list)
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    # env = pommerman.make('PommeTeamCompetition-v0', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            # env.render()
            actions = env.act(state)
            print("Actions every step ", actions)
            state, reward, done, info = env.step(actions)
            print("One step ")
            # print("State ", state) # Array of 4 states
            print("Reward ", reward)  # -1, 0 ,1
            # print("Done ", done) # False/True
            # print("Info ", info) # {'result': <Result.Win: 0>, 'winners': [3]}
        print('Episode {} finished'.format(i_episode))

        # for i, al in enumerate(agent_list):
        #     print("Agent Idx ", i, al.get_state_timline())
    env.close()
示例#6
0
def main():
    #read configure file or parse arguments
    args = parser.ParseArgs()
    cfg = {
        "learning_rate": 0.1,
        "activiation": None,
    }

    #save configure parameter #args:env,multiprocess,hyperparameter
    cpu = args.cpu  # Bool type
    env_id = args.env_id  # name-version

    #create envirnonment and recorder
    env = vector.make(env_id, 1, asynchronous=False, wrappers=None)
    env = wrappers.Monitor(env,
                           directory='/tmp/results',
                           video_callble=None,
                           force=False,
                           resume=False,
                           write_upon_reset=False,
                           uid=None,
                           mode=None)
    env.seed(0)

    #create policy/agent
    agent = ag.RandomAgent(env.action_space)

    #create optimizer and set hyperparameters
    model = model.MultiLayerPerc(cfg, env.ob_space, env.action_space)
    optimizer = op.ces(cfg)

    #set level of log information
    logger.set_level(logger.INFO)

    #initialize
    episode_count = 100
    reward = 0
    render = False
    done = False

    # rollout
    for i in range(episode_count):
        ob = env.reset()
        while True:
            action = agent.act(ob, reward, done)
            ob, reward, done = env.step(action)
            cum_reward += reward
            if render and i % 10 == 0:
                env.render()
            if done:
                break

    env.close()
示例#7
0
def run_test_suite(ABPlayer, depth, time, x, y, n):
    seed = 1234
    random.seed(seed)

    max = 25
    AlphaBetaVictories = 0
    for i in range(0, 25):

        #
        # Random vs. AlphaBeta
        #
        goSecond = game.Game(
            x,  # width
            y,  # height
            n,  # tokens in a row to win
            agent.RandomAgent("random"),  # player 1
            aba.AlphaBetaAgent("alphabeta", depth))  # player 2

        # AlphaBeta vs Random
        #
        goFirst = game.Game(
            x,  # width
            y,  # height
            n,  # tokens in a row to win
            aba.AlphaBetaAgent("alphabeta", depth),  # player 1
            agent.RandomAgent("random"))  # player 2

        if ABPlayer == 1:
            outcome = goFirst.timed_go(time)
            if outcome == 1:
                AlphaBetaVictories += 1
        else:
            outcome = goSecond.timed_go(time)
            if outcome == 2:
                AlphaBetaVictories += 1
        seed += random.randint(1, 100)
        #print("RANDOM SEED: " + str(seed))
        random.seed(seed)
        print("Game " + str(i) + " complete")
    print("AlphaBeta won " + str(AlphaBetaVictories) + " out of " + str(max))
示例#8
0
def create_player_list(args):
    # Only need board_params and players in args
    board_params = args["board_params"]

    list_players = []
    for i, player_args in enumerate(args["players"]):
        kwargs = removekey(player_args, "agent")
        if player_args["agent"] == "RandomAgent":
            list_players.append(agent.RandomAgent(f"Random_{i}"))
        elif player_args["agent"] == "PeacefulAgent":
            list_players.append(agent.PeacefulAgent(f"Peaceful_{i}"))
        elif player_args["agent"] == "FlatMCPlayer":
            list_players.append(
                agent.FlatMCPlayer(name=f'flatMC_{i}', **kwargs))
        elif player_args["agent"] == "UCTPlayer":
            list_players.append(agent.UCTPlayer(name=f'UCT_{i}', **kwargs))
        elif player_args["agent"] == "PUCTPlayer":
            world = World(board_params["path_board"])
            board = Board(
                world,
                [agent.RandomAgent('Random1'),
                 agent.RandomAgent('Random2')])
            board.setPreferences(board_params)
            puct = load_puct(board, player_args)
            list_players.append(puct)
        elif player_args["agent"] == "NetPlayer":
            world = World(board_params["path_board"])
            board = Board(
                world,
                [agent.RandomAgent('Random1'),
                 agent.RandomAgent('Random2')])
            board.setPreferences(board_params)
            netPlayer = load_NetPlayer(board, player_args)
            list_players.append(netPlayer)
        elif player_args["agent"] == "Human":
            hp_name = player_args["name"] if "name" in player_args else "human"
            hp = agent.HumanAgent(name=hp_name)
            list_players.append(hp)

    return list_players
示例#9
0
 def __battle_random_agent(self):
     executor = concurrent.futures.ProcessPoolExecutor()
     for index in range(0, self.__NUMBER_INDIVIDUALS):
         waiting_queue = []
         for times in range(0, self.__NUMBER_BATTLES):
             first = self.__now_generation[index][0].copy()
             second = agent.RandomAgent()
             game = game_board.GameBoard(first, second)
             waiting_queue.append(executor.submit(game.game_start))
         for end_task in concurrent.futures.as_completed(waiting_queue):
             self.__progress_bar.update(1)
             if end_task.result() == -1:
                 self.__now_generation[index][1] += 2
             elif end_task.result() == 2:
                 self.__now_generation[index][1] += 1
     executor.shutdown()
示例#10
0
def test_cost(layout, circle, dices=None):
    C_th, _ = value_iteration.markovDecision(layout, circle, actions=dices)

    if dices == None:
        pi = agent.OptimalAgent(layout, circle)
    else:
        pi = agent.RandomAgent(dices)

    C_sim = simulation.estimate_cost(layout, circle, pi, n_episodes=int(1e3))

    passed = np.allclose(C_th, C_sim, atol=0.1)
    if not passed:
        print("Not the same expected cost:")
        print("Th: ", *["{:.2f}".format(c) for c in C_th])
        print("Sim:", *["{:.2f}".format(c) for c in C_sim])
    else:
        print("OK")
    return passed
示例#11
0
def playGames(agent_b, agnet_w=agent.RandomAgent(), max_epochs):
    w_win =0
    b_win =0
    scores = []
    for i_episode in range(args.max_epochs):
        observation = env.reset()
        While True:               
            ################### 黑棋 B ############################### 0表示黑棋
            #  这部分 黑棋
            action = [65,0] 
            enables = env.possible_actions
            if len(enables) == 0:
                action[0] = env.board_size**2 + 1
            else:
                action[0] = agent_b.place(observation, enables, 0)#  0 表示黑棋    
            observation, reward, done, info = env.step(action)            
            ################### 白棋  W ############################### 1表示白棋
            #  这部分 白棋
            action = [65,1]
            enables = env.possible_actions
            # if nothing to do ,select pass
            if len(enables) == 0:
                action[0] = env.board_size ** 2 + 1 # pass
            else:
                action[0] = agent_w.place(observation, enables, 1)
            observation, reward, done, info = env.step(action)
            ################## GAME OVER ###########################
            if done: # 游戏 结束
                # env.render()
                black_score = len(np.where(env.state[0,:,:]==1)[0]) ############## 这里猪脚的程序有问题,因为可以棋盘下不满
                white_score = len(np.where(env.state[1,:,:]==1)[0])
                if black_score > white_score:
                    b_win += 1
                else:
                    w_win += 1
                scores.append((black_score,white_score))
                break
示例#12
0
def test_experiment_shoebox():
    """
    Testing a run with ShoeBox room

    TODO
    """
    # Shoebox Room
    room = room_types.ShoeBox(x_length=10, y_length=10)

    agent_loc = np.array([3, 8])

    # Set up the gym environment
    env = gym.make(
        "audio-room-v0",
        room_config=room.generate(),
        agent_loc=agent_loc,
        corners=room.corners,
        max_order=10,
        step_size=1.0,
        acceptable_radius=0.8,
    )

    # create buffer data folders
    utils.create_buffer_data_folders()

    tfm = nussl.datasets.transforms.Compose([
        nussl.datasets.transforms.GetAudio(mix_key='new_state'),
        nussl.datasets.transforms.ToSeparationModel(),
        nussl.datasets.transforms.GetExcerpt(excerpt_length=32000, tf_keys=['mix_audio'], time_dim=1),
    ])

    # create dataset object (subclass of nussl.datasets.BaseDataset)
    dataset = BufferData(folder=constants.DIR_DATASET_ITEMS, to_disk=True, transform=tfm)

    # Load the agent class
    a = agent.RandomAgent(env=env, dataset=dataset, episodes=2, max_steps=10, plot_reward_vs_steps=False)
    a.fit()
示例#13
0
def main(args=None):
    from optparse import OptionParser
    usage = "usage: %prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("-p",
                      "--player1",
                      dest="player1",
                      default="random",
                      help="Choose type of first player")

    (opts, args) = parser.parse_args(args)

    evalArgs = load_weights()
    evalFn = aiAgents.nnetEval

    p1 = None
    if opts.player1 == 'random':
        p1 = agent.RandomAgent(game.Game.TOKENS[0])
    elif opts.player1 == 'reflex':
        p1 = aiAgents.TDAgent(game.Game.TOKENS[0], evalArgs)
    elif opts.player1 == 'expectiminimax':
        p1 = aiAgents.ExpectiMiniMaxAgent(game.Game.TOKENS[0], evalFn,
                                          evalArgs)
    elif opts.player1 == 'human':
        p1 = agent.HumanAgent(game.Game.TOKENS[0])


#    p2 = agent.RandomAgent(game.Game.TOKENS[1])
    p2 = aiAgents.ExpectiMiniMaxAgent(game.Game.TOKENS[1], evalFn, evalArgs)
    if p1 is None:
        print "Please specify legitimate player"
        import sys
        sys.exit(1)

    play([p1, p2])
示例#14
0
def main(args):
    if not os.path.isdir(args.log_dir):
        os.makedirs(args.log_dir)

    if args.save_dir and not os.path.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    if args.seed:
        random.seed(args.seed)
        np.random.seed(args.seed)

    basename = envs.BASENAMES[args.environment]

    env_name = '{basename}-{scenario}-{dataset}-v0'.format(
        basename=basename, scenario=args.scenario, dataset=args.dataset)
    env = gym.make(env_name)

    print("Environment {}".format(args.environment))
    print(env.observation_space)
    print(env.action_space)

    if args.features == 'net':
        extractor = feature_extractor.NetFeatureExtractor()
    elif args.features == 'hash':
        extractor = feature_extractor.ImageHashExtractor()
    else:
        raise NotImplementedError(
            "Unknown feature extraction method '{}'".format(args.features))

    if args.agent == "baseline":
        # Special case -> handled somewhere else
        baseline(env, args)
        return
    elif args.agent == "bandit":
        actor = agent.BanditAgent(env, extractor)  # VW
    elif args.agent == "random":
        actor = agent.RandomAgent(env)

    obs = env.reset()
    print(obs)

    original_score = 0.0
    modified_score = 0.0
    totalreward = 0.0
    totalsuccess = 0.0
    iter_duration = 0.0
    statistics = {}

    for idx in range(env.action_space.n):
        statistics[env.actions[idx][0]] = {
            'action': env.actions[idx][0],
            'count': 0,
            'reward': 0.0,
            'success': 0
        }

        if env.is_hierarchical_action(idx):
            params = env.actions[idx][1]
            for param_idx in range(env.hierarchical_actions[idx]['space'].n):
                statistics[params[param_idx][0]] = {
                    'action': params[param_idx][0],
                    'count': 0,
                    'reward': 0.0,
                    'success': 0
                }

    log_file = '{timestamp}-{env}-{sc}-{agent}.json'.format(
        timestamp=datetime.now().strftime("%Y%m%d%H%M%S"),
        env=args.environment,
        sc=args.scenario,
        agent=args.agent)
    log_file = os.path.join(args.log_dir, log_file)

    for iteration in range(1, args.iterations + 1):
        start = time.time()
        act = actor.act(obs)
        obs, reward, done, info = env.step(act)

        actor.update(reward, done=done)
        iter_duration += time.time() - start

        action_name, param_name = env.get_action_name(act[0], act[1])

        statistics[action_name]['count'] += 1
        statistics[action_name]['reward'] += reward[0]
        statistics[action_name]['success'] += reward[0] > 0

        if param_name:
            statistics[param_name]['count'] += 1
            statistics[param_name]['reward'] += reward[1]
            statistics[param_name]['success'] += reward[1] > 0

        original_score += info['original_score']
        modified_score += info['modified_score']
        totalreward += reward[0]
        totalsuccess += reward[0] > 0

        if done:
            obs = env.reset()

        if (iteration % args.log_interval
                == 0) or iteration == args.iterations:
            stat_string = ' | '.join([
                "{:.2f} ({:.2f}/{:d})".format(
                    v['success'] / (v['count'] + 1e-10), v['success'],
                    v['count']) for v in statistics.values()
            ])
            print("i = {}".format(iteration), round(totalsuccess / iteration,
                                                    2),
                  round(original_score / iteration, 2),
                  round(modified_score / iteration, 2), '\t', stat_string)

            log_dict = {
                'env': args.environment,
                'scenario': args.scenario,
                'agent': args.agent,
                'iteration': iteration,
                'totalreward': totalreward,
                'success': totalsuccess,
                'statistics': statistics,
                'original_accuracy': float(original_score) / iteration,
                'modified_accuracy': float(modified_score) / iteration,
                'duration': iter_duration / iteration
            }
            open(log_file, 'a').write(json.dumps(log_dict) + os.linesep)
示例#15
0
    def search(self, state, depth, use_val=False):
        # print("\n\n-------- SEARCH --------")
        # print(f"depth: {depth}")
        # state.report()

        # Is terminal? return vector of score per player
        if isTerminal(state) or depth > self.max_depth:
            # print("\n\n-------- TERMINAL --------")
            return score_players(state), score_players(state)

        # Active player is dead, then end turn
        while not state.activePlayer.is_alive:
            state.endTurn()
            if state.gameOver:
                return score_players(state), score_players(state)

        s = hash(state)
        # Is leaf?
        if not s in self.Ps:
            canon, map_to_orig = state.toCanonical(state.activePlayer.code)
            batch = torch_geometric.data.Batch.from_data_list(
                [boardToData(canon)])
            mask, moves = maskAndMoves(canon, canon.gamePhase,
                                       batch.edge_index)

            if not self.apprentice is None:
                policy, value = self.apprentice.play(canon)
            else:
                # No bias, just uniform sampling for the moment
                policy, value = torch.ones_like(mask) / max(
                    mask.shape), torch.zeros((1, 6))

            policy = policy * mask
            self.Vs[s], self.As[s] = mask.squeeze(), moves
            self.Ps[s] = policy.squeeze()
            self.Ns[s] = 1

            # Return an evaluation
            v = np.zeros(6)
            for _ in range(self.sims_per_eval):
                sim = copy.deepcopy(state)
                sim.simulate(agent.RandomAgent())
                v += score_players(sim)
            v /= self.sims_per_eval

            # Fix order of value returned by net
            value = value.squeeze()
            # Apprentice already does this
            # cor_value = torch.FloatTensor([value[map_to_orig.get(i)] if not map_to_orig.get(i) is None else 0.0  for i in range(6)])
            cor_value = value
            return v, cor_value

        # Not a leaf, keep going down. Use values for the current player
        p = state.activePlayer.code
        action = -1
        bestScore = -float('inf')
        # print("Valid:")
        # print(self.Vs[s])
        for i, act in enumerate(self.As[s]):
            a = hash(act)
            # print(i, act)
            if self.Vs[s][i] > 0.0:
                if (s, a) in self.Rsa:
                    # PUCT formula
                    uct = self.Rsa[(s, a)][p] + self.cb * np.sqrt(
                        np.log(self.Ns[s]) / max(self.Nsa[(s, a)], self.eps))
                    val = self.wb * self.Qsa[(s, a)] * (use_val)
                    pol = self.wa * self.Ps[s][i] / (self.Nsa[(s, a)] + 1)
                    sc = uct + pol + val[p]
                else:
                    # Unseen action, take it
                    action = act
                    break
                if sc > bestScore:
                    bestScore = sc
                    action = act

        if isinstance(action, int) and action == -1:
            print("**** No move?? *****")
            state.report()
            print(self.As[s])
            print(self.Vs[s])

        # print('best: ', action)
        a = hash(action)  # Best action in simplified way
        move = buildMove(state, action)
        # Play action, continue search
        # TODO: For now, armies are placed on one country only to simplify the game
        # print(move)
        state.playMove(move)
        v, net_v = self.search(state, depth + 1, use_val)
        if isinstance(net_v, torch.Tensor):
            net_v = net_v.detach().numpy()
        if isinstance(v, torch.Tensor):
            v = v.detach().numpy()

        if (s, a) in self.Rsa:
            rsa, qsa, nsa = self.Rsa[(s, a)], self.Qsa[(s, a)], self.Nsa[(s,
                                                                          a)]
            self.Rsa[(s, a)] = (nsa * rsa + v) / (nsa + 1)
            self.Qsa[(s, a)] = (nsa * qsa + net_v) / (nsa + 1)
            self.Nsa[(s, a)] += 1

        else:
            self.Rsa[(s, a)] = v
            self.Qsa[(s, a)] = net_v
            self.Nsa[(s, a)] = 1

        self.Ns[s] += 1

        return v, net_v
示例#16
0
    # Calculate and print scores
    sscores = sorted(((v, k.name) for k, v in scores.items()), reverse=True)
    print("\nSCORES:")
    for v, k in sscores:
        print(v, k)


#######################
# Run the tournament! #
#######################

# Set random seed for reproducibility
random.seed(1)

# Construct list of agents in the tournament
agents = [
    # aba.AlphaBetaAgent("aba", 4),
    agent.RandomAgent("random1"),
    agent.RandomAgent("random2"),
    agent.RandomAgent("random3"),
    agent.RandomAgent("random4")
]

# Run!
play_tournament(
    7,  # board width
    6,  # board height
    4,  # tokens in a row to win
    15,  # time limit in seconds
    agents)  # player list
示例#17
0
    # GET THE AGENT
    ###########################

    import agent
    a = None

    if opts.agent == 'value':
        a = agent.ValueIterationAgent(mdp, opts.discount, opts.iters)
    elif opts.agent == 'q':
        a = agent.QLearningAgent(env.getPossibleActions, opts.discount,
                                 opts.learningRate, opts.epsilon)
    elif opts.agent == 'random':
        # No reason to use the random agent without episodes
        if opts.episodes == 0:
            opts.episodes = 1
        a = agent.RandomAgent(mdp.getPossibleActions)
    else:
        raise 'Unknown agent type: ' + opts.agent

    ###########################
    # RUN EPISODES
    ###########################
    print(opts.agent)
    # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
    if opts.agent == 'value':
        display.displayValues(a,
                              message="VALUES AFTER " + str(opts.iters) +
                              " ITERATIONS")
        display.pause()
        display.displayQValues(a,
                               message="Q-VALUES AFTER " + str(opts.iters) +
示例#18
0
def main(args=None):
    import sys
    print("Please choose the type of agent  human or TDagent or random")
    line = sys.stdin.readline()

    from optparse import OptionParser
    usage = "usage: %prog [options]"
    parser = OptionParser(usage=usage)

    parser.add_option("-d",
                      "--draw",
                      dest="draw",
                      action="store_true",
                      default=False,
                      help="Draw game")
    parser.add_option("-n",
                      "--num",
                      dest="numgames",
                      default=1,
                      help="Num games to play")
    parser.add_option("-p",
                      "--player1",
                      dest="player1",
                      default=str(line.strip()),
                      help="Choose type of first player")
    parser.add_option("-e",
                      "--eval",
                      dest="eval",
                      action="store_true",
                      default=True,
                      help="Play with the better eval function for player")

    (opts, args) = parser.parse_args(args)

    weights = None
    weights1 = None

    if opts.eval:
        weights, weights1 = load_weights(weights, weights1)

        evalArgs = weights
        evalArgs1 = weights1
        evalFn = aiAgents.nnetEval
    print("The choosen agent is: " + str(opts.player1))
    p1 = None

    if str(opts.player1) == 'random':
        p1 = agent.RandomAgent(game.Game.TOKENS[0])
        #print p1

    elif opts.player1 == 'TDagent':
        p1 = aiAgents.TDAgent(game.Game.TOKENS[0], evalArgs1)
    elif opts.player1 == 'expectimax':
        p1 = aiAgents.ExpectimaxAgent(game.Game.TOKENS[0], evalFn, evalArgs)
    elif opts.player1 == 'expectiminimax':
        p1 = aiAgents.ExpectiMiniMaxAsgent(game.Game.TOKENS[0], evalFn,
                                           evalArgs)
    elif opts.player1 == 'human':
        p1 = agent.HumanAgent(game.Game.TOKENS[0])

    p2 = aiAgents.TDAgent(game.Game.TOKENS[1], evalArgs)
    #    p2 = aiAgents.ExpectiMiniMaxAgent(game.Game.TOKENS[1],evalFn,evalArgs)

    if opts.player1 == 'random':
        test([p1, p2], numGames=int(opts.numgames), draw=opts.draw)
        print("o is random")
        print("x is the agent")
    if opts.player1 == 'TDagent':
        #test([p1,p2],numGames=int(opts.numgames),draw=opts.draw)

        play([p1, p2])
    if opts.player1 == 'human':
        play([p1, p2])
        print("o is td(0)")
        print("x is the agent td(0.5)")
    if p1 is None:
        print "Please specify legitimate player"
        import sys
        sys.exit(1)
示例#19
0
expert_mcts_sims = inputs["expert_mcts_sims"]

path_data = inputs["path_data"]
path_model = inputs["path_model"]
batch_size = inputs["batch_size"]
model_args = read_json(inputs["model_parameters"])

path_board = inputs["path_board"]

# ---------------- Model -------------------------

#%%% Create Board
world = World(path_board)

# Set players
pR1, pR2, pR3 = agent.RandomAgent('Red'), agent.RandomAgent(
    'Blue'), agent.RandomAgent('Green')
players = [pR1, pR2, pR3]
# Set board
# TODO: Send to inputs
prefs = {
    'initialPhase': True,
    'useCards': True,
    'transferCards': True,
    'immediateCash': True,
    'continentIncrease': 0.05,
    'pickInitialCountries': True,
    'armiesPerTurnInitial': 4,
    'console_debug': False
}
示例#20
0
 def testRandomAgent(self):
     rs = np.random.RandomState(seed=1)
     env = gw.Grid2D(np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]), r=rs)
     ra = a.RandomAgent(env, start=[1, 1], r=rs)
示例#21
0
PLOT_FREQUENCY = 500
BATCH_SIZE = 1024  # for faster training take a smaller batch size, not too small as batchnorm will not work otherwise
GAMMA = 0.9  # already favors reaching goal faster, no need for reward_step, the lower GAMMA the faster
EPS_START = 0.9  # for unstable models take higher randomness first
EPS_END = 0.01
EPS_DECAY = 2000
N_SMOOTH = 500  # plotting scores averaged over this number of episodes
VERBOSE = 1  # level of printed output verbosity:
# 1: plot averaged episode stats
# 2: also print actions taken and rewards
# 3: every 100 episodes run_env()
# also helpful sometimes: printing probabilities in "select_action" function of agent

num_episodes = 100000  # training for how many episodes
agent0 = agent.Stratego(0)
agent1 = agent.RandomAgent(1)
# agent1 = agent.Random(1)
# agent1.model = agent0.model  # if want to train by self-play
env__ = teacher.Trainer(agent0, agent1, False, "custom", [0, 1])
env_name = "stratego"

model = env__.agents[0].model  # optimize model of agent0
model = model.to(device)
optimizer = optim.Adam(model.parameters())
memory = utils.ReplayMemory(10000)

# model.load_state_dict(torch.load('./saved_models/{}_current.pkl'.format(env_name)))  # trained against Random
train(env__, num_episodes)
# model.load_state_dict(torch.load('./saved_models/{}.pkl'.format(env_name)))  # trained against Random

run_env(env__, 10000)
示例#22
0
import random
import game
import agent
import alpha_beta_agent as aba

# Set random seed for reproducibility
random.seed(1)

#
# Random vs. Random
#
g = game.Game(
    7,  # width
    6,  # height
    4,  # tokens in a row to win
    agent.RandomAgent("random1"),  # player 1
    agent.RandomAgent("random2"))  # player 2

#
# Human vs. Random
#
# g = game.Game(7, # width
#               6, # height
#               4, # tokens in a row to win
#               agent.InteractiveAgent("human"),    # player 1
#               agent.RandomAgent("random"))        # player 2

#
# Random vs. AlphaBeta
#
# g = game.Game(7, # width
示例#23
0
    def __getattr__(self, name):
        return self[name]


args = dotdict({
    'max_epochs': 100,
    'play_turns': 100,
    'checkpoint': './checkpoint/',
    'load_model': False,
    'load_folder': '',
})

if __name__ == "__main__":
    w_win = 0
    b_win = 0
    agent_b = agent.RandomAgent()
    agent_w = agent.GreedyAgent()

    for i_episode in range(args.max_epochs):
        observation = env.reset()
        # observation  是 3 x 8 x 8 的 list,表示当前的棋局,具体定义在 reversi.py 中的 state
        for t in range(args.play_turns):
            action = [65, 0]
            # action  包含 两个整型数字,action[0]表示下棋的位置,action[1] 表示下棋的颜色(黑棋0或者白棋1)

            ################### 黑棋 B ############################### 0表示黑棋
            #  这部分 黑棋
            #env.render()  #  打印当前棋局
            enables = env.possible_actions
            if len(enables) == 0:
                action[0] = env.board_size**2 + 1
示例#24
0
    # def _convert_point_coord_to_move(self, pointx: int, pointy: int) -> None:
    #     ''' Converts canvas point to a move that can be inputted in the othello game '''
    #     row = int(pointy // self._board.get_cell_height())
    #     if row == self._board.get_rows():
    #         row -= 1
    #     col = int(pointx // self._board.get_cell_width())
    #     if col == self._board.get_columns():
    #         col -= 1
    #     return (row, col)

    def _on_board_resized(self, event: tkinter.Event) -> None:
        ''' Called whenever the canvas is resized '''
        self._board.redraw_board()


if __name__ == '__main__':
    black_wins = 0
    white_wins = 0
    for i in range(20):
        gui = OthelloGUI(agent.AlphaBetaAgent(), agent.RandomAgent())
        gui.start()
        winner = gui.findWinner()
        if winner == 'B':
            black_wins = black_wins + 1
        if winner == 'W':
            white_wins = white_wins + 1
    print("black wins:")
    print(black_wins)
    print("white wins:")
    print(white_wins)
示例#25
0
    path_board = board_params["path_board"]

    epochs = inputs["epochs"]
    eval_every = inputs["eval_every"]

    # ---------------- Load model -------------------------

    move_types = [
        'initialPick', 'initialFortify', 'startTurn', 'attack', 'fortify'
    ]

    #%%% Create Board
    world = World(path_board)

    # Set players
    pR1, pR2 = agent.RandomAgent('Red'), agent.RandomAgent('Blue')
    players = [pR1, pR2]
    # Set board
    # TODO: Send to inputs
    prefs = board_params

    board_orig = Board(world, players)
    board_orig.setPreferences(prefs)

    num_nodes = board_orig.world.map_graph.number_of_nodes()
    num_edges = board_orig.world.map_graph.number_of_edges()

    if verbose: print("\t\ttrain_model: Creating model")
    net = GCN_risk(
        num_nodes, num_edges, model_args['board_input_dim'],
        model_args['global_input_dim'], model_args['hidden_global_dim'],
示例#26
0
#

# g = game.Game(7, # width
#               6, # height
#               4, # tokens in a row to win
#               agent.InteractiveAgent("human"),    # player 1
#               agent.RandomAgent("random"))        # player 2

#
# Random vs. AlphaBeta

g = game.Game(
    10,  # width
    8,  # height
    5,  # tokens in a row to win
    agent.RandomAgent("random"),  # player 1
    aba.AlphaBetaAgent("alphabeta", 4))  # player 2

#
# Human vs. AlphaBeta
#
# g = game.Game(7, # width
#               6, # height
#               4, # tokens in a row to win
#               agent.InteractiveAgent("human"),    # player 1
#               aba.AlphaBetaAgent("alphabeta", 4)) # player 2

#
# Human vs. Human
#
# g = game.Game(7, # width
示例#27
0
def train(args):
    w_atk = np.random.normal(0, 1e-2, (util.NUM_FEATURES, ))
    w_def = np.random.normal(0, 1e-2, (util.NUM_FEATURES, ))
    w_atk[-1] = 0
    w_def[-1] = 0

    agents = [getAgent(args.agent, 0), getAgent(args.agent, 1)]
    for agent in agents:
        agent.setAttackWeights(w_atk)
        agent.setDefendWeights(w_def)

    g = dk.Durak()
    for i in range(args.numGames):
        attacker = g.getFirstAttacker()
        defender = int(not attacker)
        while True:
            preAttack = None
            preDefend = None
            while True:
                preAttack = g.getState(attacker)
                attack(g, attacker, agents[attacker])
                postAttack = g.getState(defender)
                if g.roundOver():
                    break
                elif preDefend is not None:
                    w_def = TDUpdate(preDefend, postAttack, 0, w_def)
                    for agent in agents:
                        agent.setDefendWeights(w_def)

                preDefend = postAttack
                defend(g, defender, agents[defender])
                postDefend = g.getState(attacker)
                if g.roundOver():
                    break
                else:
                    w_atk = TDUpdate(preAttack, postDefend, 0, w_atk)
                    for agent in agents:
                        agent.setAttackWeights(w_atk)

            if g.gameOver():
                if g.isWinner(attacker):
                    w_atk = TDUpdate(g.getState(attacker), None, 1, w_atk)
                    w_def = TDUpdate(g.getState(defender), None, 0, w_def)
                else:
                    w_def = TDUpdate(g.getState(defender), None, 1, w_def)
                    w_atk = TDUpdate(g.getState(attacker), None, 0, w_atk)
                for agent in agents:
                    agent.setAttackWeights(w_atk)
                    agent.setDefendWeights(w_def)
                break

            g.endRound()

            # Edge case, the defender from the last round won
            if g.gameOver():
                w_def = TDUpdate(g.getState(defender), None, 1, w_def)
                w_atk = TDUpdate(g.getState(attacker), None, 0, w_atk)
                for agent in agents:
                    agent.setDefendWeights(w_def)
                    agent.setAttackWeights(w_atk)
                break
            else:
                w_def = TDUpdate(preDefend, g.getState(defender), 0, w_def)
                w_atk = TDUpdate(preAttack, g.getState(attacker), 0, w_atk)
                for agent in agents:
                    agent.setDefendWeights(w_def)
                    agent.setAttackWeights(w_atk)

            attacker = g.attacker
            defender = int(not attacker)

        if i % 50 == 0:
            print(('Training iteration: %d / %d' % (i, args.numGames)))
            randomAgent = agt.RandomAgent()
            simpleAgent = agt.SimpleAgent()
            winCounts = {'random': 0, 'simple': 0}
            for _ in range(500):
                winVsRandom = play(dk.Durak(), [randomAgent, agents[0]])
                winVsSimple = play(dk.Durak(), [simpleAgent, agents[0]])
                winCounts['random'] += winVsRandom
                winCounts['simple'] += winVsSimple
            with open('results.csv', 'a') as f:
                row = [i, winCounts['random'], winCounts['simple']]
                row.extend(w_atk)
                row.extend(w_def)
                np.savetxt(f,
                           np.array(row)[:, None].T,
                           delimiter=',',
                           fmt='%.4e')

            # save weights
            with open('%s_attack_%d.bin' % (args.agent, i), 'w') as f_atk:
                pickle.dump(w_atk, f_atk)
            with open('%s_defend_%d.bin' % (args.agent, i), 'w') as f_def:
                pickle.dump(w_def, f_def)

        g.newGame()

    with open('%s_attack.bin' % args.agent, 'w') as f_atk:
        pickle.dump(w_atk, f_atk)
    with open('%s_defend.bin' % args.agent, 'w') as f_def:
        pickle.dump(w_def, f_def)

    return w_atk, w_def
示例#28
0

#######################
# Run the tournament! #
#######################

# Set random seed for reproducibility
random.seed(1)

# GAME CONFIGURATION
depth = 4
tokens_to_win = 4
time_limit = 15

for i in range(1):
    random.seed(i)
    # Run!
    play_tournament(
        7,  # board width
        6,  # board height
        tokens_to_win,  # tokens in a row to win
        time_limit,  # time limit in seconds
        [
            aba.AlphaBetaAgent("New AI", depth, tokens_to_win),
            oaba.OldAlphaBetaAgent("Old AI", depth, tokens_to_win),
            agent.RandomAgent("random1"),
            # agent.RandomAgent("random2"),
            # agent.RandomAgent("random3"),
            # agent.RandomAgent("random4"),
        ])  # player list
示例#29
0
文件: env.py 项目: Drkchy/RL
if __name__ == "__main__":
    
    import argparse
    parser = argparse.ArgumentParser(description='2048 Game w/ AI')
    parser.add_argument('-a', '--agent', type=str, help='name of agent (Random or Expectimax)')
    parser.add_argument('-d', '--depth', type=int, default=2, help='depth')
    parser.add_argument('-g', '--goal', type=int, default=4086, help='Goal end of game, Default: 2048')
    parser.add_argument('--no-graphics', action='store_true', help='no graphics (only works when AI specified)')
    args = parser.parse_args()

    Agent = None
    graphics = True
    

    if args.agent == 'RandomAgent':
        Agent = agent.RandomAgent()
    elif args.agent == 'Depth_limited_Expectimax_Agent':
        Agent = agent.Depth_limited_Expectimax_Agent(depth=args.depth)       
    elif args.agent == 'Customized_Expectimax_Agent':
        Agent = agent.Customized_Expectimax_Agent(depth=args.depth)

# =============================================================================
#     DEMO 
# =============================================================================
    #Agent = agent.RandomAgent()
    #Agent = agent.Basic_Expectimax_Agent()
    Agent = agent.Depth_limited_Expectimax_Agent(depth=2) 
    #Agent = agent.Customized_Expectimax_Agent(depth = 2)
# =============================================================================

    
示例#30
0
def main(args=None):
    from optparse import OptionParser
    usage = "usage: %prog [options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-t",
                      "--train",
                      dest="train",
                      action="store_true",
                      default=False,
                      help="Train TD Player")
    parser.add_option("-d",
                      "--draw",
                      dest="draw",
                      action="store_true",
                      default=False,
                      help="Draw game")
    parser.add_option("-n",
                      "--num",
                      dest="numgames",
                      default=1,
                      help="Num games to play")
    parser.add_option("-p",
                      "--player1",
                      dest="player1",
                      default="random",
                      help="Choose type of first player")
    parser.add_option("-e",
                      "--eval",
                      dest="eval",
                      action="store_true",
                      default=False,
                      help="Play with the better eval function for player")

    (opts, args) = parser.parse_args(args)

    weights = None
    if opts.train:
        weights = train()

    if opts.eval:
        weights = load_weights(weights)
        evalFn = submission.logLinearEvaluation
        evalArgs = weights
    else:
        evalFn = submission.simpleEvaluation
        evalArgs = None

    p1 = None
    if opts.player1 == 'random':
        p1 = agent.RandomAgent(game.Game.TOKENS[0])
    elif opts.player1 == 'reflex':
        p1 = submission.ReflexAgent(game.Game.TOKENS[0], evalFn, evalArgs)
    elif opts.player1 == 'expectimax':
        p1 = submission.ExpectimaxAgent(game.Game.TOKENS[0], evalFn, evalArgs)
    elif opts.player1 == 'human':
        p1 = agent.HumanAgent(game.Game.TOKENS[0])

    p2 = agent.RandomAgent(game.Game.TOKENS[1])

    if p1 is None:
        print "Please specify legitimate player"
        import sys
        sys.exit(1)

    test([p1, p2], numGames=int(opts.numgames), draw=opts.draw)