def train(args): # Make environments, CFR only supports Leduc Holdem env = rlcard.make('leduc-holdem', config={'seed': 0, 'allow_step_back':True}) eval_env = rlcard.make('leduc-holdem', config={'seed': 0}) # Seed numpy, torch, random set_seed(args.seed) # Initilize CFR Agent agent = CFRAgent(env, os.path.join(args.log_dir, 'cfr_model')) agent.load() # If we have saved model, we first load the model # Evaluate CFR against random eval_env.set_agents([agent, RandomAgent(num_actions=env.num_actions)]) # Start training with Logger(args.log_dir) as logger: for episode in range(args.num_episodes): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with Random agents. if episode % args.evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, args.num_eval_games)[0]) # Get the paths csv_path, fig_path = logger.csv_path, logger.fig_path # Plot the learning curve plot_curve(csv_path, fig_path, 'cfr')
def load_model(model_path, env=None, position=None, device=None): if os.path.isfile(model_path): # Torch model import torch agent = torch.load(model_path, map_location=device) agent.set_device(device) elif os.path.isdir(model_path): # CFR model from rlcard.agents import CFRAgent agent = CFRAgent(env, model_path) agent.load() elif model_path == 'random': # Random model from rlcard.agents import RandomAgent agent = RandomAgent(num_actions=env.num_actions) else: # A model in the model zoo from rlcard import models agent = models.load(model_path).agents[position] return agent
def train_uno(): # Make environment and enable human mode env = rlcard.make('uno', config={'seed': 0, 'allow_step_back':True}) eval_env = rlcard.make('uno', config={'seed': 0}) # Set the iterations numbers and how frequently we evaluate the performance and save model evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/uno_cfr_result/' # Set a global seed set_global_seed(0) model_path = 'models/uno_cfr' # Initilize CFR Agent agent = CFRAgent(env,model_path = model_path) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP random_agent = RandomAgent(action_num=eval_env.action_num) eval_env.set_agents([agent, random_agent]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0]) # Close files in the logger logger.close_files() # Plot the learning curve logger.plot('CFR')
class LeducHoldemCFRModel(Model): ''' A pretrained model on Leduc Holdem with CFR (chance sampling) ''' def __init__(self): ''' Load pretrained model ''' env = rlcard.make('leduc-holdem') self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) self.agent.load() @property def agents(self): ''' Get a list of agents for each position in a the game Returns: agents (list): A list of agents Note: Each agent should be just like RL agent with step and eval_step functioning well. ''' return [self.agent, self.agent]
# Set the iterations numbers and how frequently we evaluate the performance and save model evaluate_every = 100 save_plot_every = 1000 evaluate_num = 10000 episode_num = 10000 # The paths for saving the logs and learning curves log_dir = './experiments/leduc_holdem_cfr_result/' # Set a global seed set_global_seed(0) # Initilize CFR Agent agent = CFRAgent(env) agent.load() # If we have saved model, we first load the model # Evaluate CFR against pre-trained NFSP eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]]) # Init a Logger to plot the learning curve logger = Logger(log_dir) for episode in range(episode_num): agent.train() print('\rIteration {}'.format(episode), end='') # Evaluate the performance. Play with NFSP agents. if episode % evaluate_every == 0: agent.save() # Save model logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0])