Python CFRAgent примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlcard.agents

Класс/Тип: CFRAgent

Примеров на hotexamples.com: 8

Python CFRAgent - 8 примеров найдено. Это лучшие примеры Python кода для rlcard.agents.CFRAgent, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CFRAgent(6)

load(5)

save(3)

train(3)

Основные методы

CFRAgent (6)

load (5)

save (3)

train (3)

Пример #1

Показать файл

Файл: pretrained_models.py Проект: aditya140/rlcard

 def __init__(self):
     ''' Load pretrained model
     '''
     env = rlcard.make('leduc-holdem')
     self.agent = CFRAgent(env,
                           model_path=os.path.join(ROOT_PATH,
                                                   'leduc_holdem_cfr'))
     self.agent.load()

Пример #2

Показать файл

def train(args):
    # Make environments, CFR only supports Leduc Holdem
    env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True})
    eval_env = rlcard.make('leduc-holdem', config={'seed': 0})

    # Seed numpy, torch, random
    set_seed(args.seed)

    # Initilize CFR Agent
    agent = CFRAgent(env, os.path.join(args.log_dir, 'cfr_model'))
    agent.load()  # If we have saved model, we first load the model

    # Evaluate CFR against random
    eval_env.set_agents([agent, RandomAgent(num_actions=env.num_actions)])

    # Start training
    with Logger(args.log_dir) as logger:
        for episode in range(args.num_episodes):
            agent.train()
            print('\rIteration {}'.format(episode), end='')
            # Evaluate the performance. Play with Random agents.
            if episode % args.evaluate_every == 0:
                agent.save() # Save model
                logger.log_performance(env.timestep, tournament(eval_env, args.num_eval_games)[0])

        # Get the paths
        csv_path, fig_path = logger.csv_path, logger.fig_path
    # Plot the learning curve
    plot_curve(csv_path, fig_path, 'cfr')

Пример #3

Показать файл

def load_model(model_path, env=None, position=None, device=None):
    if os.path.isfile(model_path):  # Torch model
        import torch
        agent = torch.load(model_path, map_location=device)
        agent.set_device(device)
    elif os.path.isdir(model_path):  # CFR model
        from rlcard.agents import CFRAgent
        agent = CFRAgent(env, model_path)
        agent.load()
    elif model_path == 'random':  # Random model
        from rlcard.agents import RandomAgent
        agent = RandomAgent(num_actions=env.num_actions)
    else:  # A model in the model zoo
        from rlcard import models
        agent = models.load(model_path).agents[position]

    return agent

Пример #4

Показать файл

class LeducHoldemCFRModel(Model):
    ''' A pretrained model on Leduc Holdem with CFR (chance sampling)
    '''

    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard.make('leduc-holdem')
        self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr'))
        self.agent.load()
    @property
    def agents(self):
        ''' Get a list of agents for each position in a the game

        Returns:
            agents (list): A list of agents

        Note: Each agent should be just like RL agent with step and eval_step
              functioning well.
        '''
        return [self.agent, self.agent]

Пример #5

Показать файл

def train_leduc():
    # Make environment and enable human mode
    env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True})
    eval_env = rlcard.make('leduc-holdem', config={'seed': 0})

    # Set the iterations numbers and how frequently we evaluate the performance and save model
    evaluate_every = 100
    save_plot_every = 1000
    evaluate_num = 10000
    episode_num = 10000

    # The paths for saving the logs and learning curves
    log_dir = './experiments/leduc_holdem_cfr_result/'

    # Set a global seed
    set_global_seed(0)

    model_path = 'models/leduc_holdem_cfr'
    # Initilize CFR Agent
    agent = CFRAgent(env,model_path=model_path)
    agent.load()  # If we have saved model, we first load the model

    # Evaluate CFR against pre-trained NFSP
    eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

    # Init a Logger to plot the learning curve
    logger = Logger(log_dir)

    for episode in range(episode_num):
        agent.train()
        print('\rIteration {}'.format(episode), end='')
        # Evaluate the performance. Play with NFSP agents.
        if episode % evaluate_every == 0:
            agent.save() # Save model
            logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])

    # Close files in the logger
    logger.close_files()

    # Plot the learning curve
    logger.plot('CFR')

Пример #6

Показать файл

eval_env = rlcard.make('leduc-holdem', config={'seed': 0})

# Set the iterations numbers and how frequently we evaluate the performance and save model
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 10000
episode_num = 10000

# The paths for saving the logs and learning curves
log_dir = './experiments/leduc_holdem_cfr_result/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
agent = CFRAgent(env)
agent.load()  # If we have saved model, we first load the model

# Evaluate CFR against pre-trained NFSP
eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    agent.train()
    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        agent.save()  # Save model
        logger.log_performance(env.timestep,

Пример #7

Показать файл

Файл: cfr.py Проект: sharedcare/PokerDQN

# The intial memory size
memory_init_size = 1000

# Train the agent every X steps
train_every = 1

# The paths for saving the logs and learning curves
log_dir = './experiments/cfr_result/'

# Set a global seed
set_global_seed(0)

# Set up the agents
agents = []
for i in range(env.player_num):
    agent = CFRAgent(env=env)
    agents.append(agent)
random_agent = RandomAgent(action_num=eval_env.action_num)

env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    for agent in agents:
        agent.train()

    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with random agents.

Пример #8

Показать файл

Файл: cfr_random.py Проект: sharedcare/PokerDQN

episode_num = 100000

# The intial memory size
memory_init_size = 1000

# Train the agent every X steps
train_every = 1

# The paths for saving the logs and learning curves
log_dir = './experiments/cfr_random_result/'

# Set a global seed
set_global_seed(0)

# Set up the agents
agent = CFRAgent(env=env, model_path='./cfr_random_model')
random_agent = RandomAgent(action_num=eval_env.action_num)

env.set_agents([agent, random_agent])
eval_env.set_agents([agent, random_agent])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    agent.train()

    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0:
        agent.save()  # Save model