def divine_current_policy_tag(expt, iter_dir, current_iter): start_model_file = os.path.join(iter_dir, 'start.pytorch') if not os.path.isfile(start_model_file): assert current_iter == 0 if expt.get('start_model'): tag = '%s:%s' % (expt['policy_cls'], expt['start_model']) policy = torch_policy.load(tag) else: policy = torch_policy.load(expt['policy_cls']) with open(start_model_file, 'wb') as fd: torch.save(policy.state_dict(), fd) del policy return '%s:%s' % (expt['policy_cls'], start_model_file)
def start(args): config.set_cuda(False) policy = torch_policy.load(args.policy) conf = utils.Ns() conf.accept_challenges = True conf.formats = ['gen7randombattle', 'gen4randombattle'] conf.timer = True conf.username = username = args.username or utils.random_name() conf.host = args.host conf.port = args.port conf.max_concurrent = 1 conf.player_ctor = lambda gid: EnginePkmnPlayer(policy, gid) conf.pslog_dir = None conf.log_dir = 'tmp/pvp' conf.wait_time_before_requesting_move_seconds = 0.0 client = Client(actor.ROOT_SENTINEL, conf) print('Setting up %s on %s:%s' % (username, args.host, args.port)) client.main() for i in range(args.num_battles): logger.info(client.queue.get()) client.join()
def start(args): logger = utils.default_logger_setup(logging.INFO) logger.info('Writing to ' + args.outdir) config.set_cuda(args.cuda) p1dir = os.path.join(args.outdir, 'p1') p2dir = os.path.join(args.outdir, 'p2') utils.mkdir_p(p1dir) utils.mkdir_p(p2dir) prog = os.path.join( config.get('showdown_root'), config.get('showdown_server_dir'), 'pokemon-showdown') game = Game(options = formats.get(args.format), prog = prog) policy_1 = torch_policy.load(args.p1) policy_2 = torch_policy.load(args.p2) wins = [0, 0] logger.info('starting...') # TODO: make multithreaded. Maybe integrate this with RL experiment for i in range(args.num_matches): p1 = EnginePkmnPlayer(policy_1, '%s-p1' % i, play_best_move = args.play_best_move in ['p1', 'both']) p2 = EnginePkmnPlayer(policy_2, '%s-p2' % i, play_best_move = args.play_best_move in ['p2', 'both']) game.play(p1, p2) for j in [0, 1]: player = [p1, p2][j] dirname = [p1dir, p2dir][j] bogger = battlelogs.BattleLogger(player.gid, dirname) for block in player.blocks: bogger.log(block) bogger.close() if player.result == 'winner': wins[j] += 1 else: assert player.result in ['loser', 'tie'] return wins
def main(): args = parse_args() config.set_cuda(False) from metagrok import remote_debug remote_debug.listen() p1_policy = torch_policy.load(args.policy_tag) p2_policy = p1_policy if args.p2_policy_tag: p2_policy = torch_policy.load(args.p2_policy_tag) fmt = formats.get(args.fmt) game = Game(fmt, '{}/{}/pokemon-showdown'.format( config.get('showdown_root'), config.get('showdown_server_dir'))) count = 0 while True: time.sleep(0.1) r = sys.stdin.readline().strip() if r == 'done': break battle_dir = os.path.join('/tmp', args.id, '%06d' % count) utils.mkdir_p(battle_dir) p1 = EnginePkmnPlayer(p1_policy, 'p1', epsilon = args.epsilon) p2 = EnginePkmnPlayer(p2_policy, 'p2', epsilon = args.epsilon) game.play(p1, p2) num_blocks = 0 for i, player in enumerate([p1, p2]): blogger = battlelogs.BattleLogger('p%d' % (i + 1), battle_dir) for block in player.blocks: blogger.log(block) num_blocks += 1 blogger.close() count += 1 sys.stdout.write('%s\t%d\n' % (battle_dir, num_blocks)) sys.stdout.flush()
def perform_rollup(expt, iter_dir, policy_tag, parallelism, rollup_fname): policy = torch_policy.load(policy_tag) rew = expt['reward_args'] shaper = reward_shaper.create(**rew['shaping']) iter_dir_arg = iter_dir if expt.get('player'): iter_dir_arg = list(utils.find(iter_dir, '%s.jsons.gz' % expt['player'])) extras = learner.rollup( policy, iter_dir_arg, rew['gamma'], rew['lam'], shaper, num_workers = parallelism, progress_type = 'log') tmp_rollup_name = '/tmp/rollup.npz' np.savez_compressed(tmp_rollup_name, **extras) shutil.move(tmp_rollup_name, rollup_fname) # 4b. zip up battle files subprocess.check_call(['zip', '-q', '-r', '/tmp/battles', 'battles'], cwd = iter_dir) shutil.move('/tmp/battles.zip', os.path.join(iter_dir, 'battles.zip')) shutil.rmtree(os.path.join(iter_dir, 'battles')) return list(extras.values())[0].shape[0]
def main(): parser = argparse.ArgumentParser() parser.add_argument('in_file') parser.add_argument('specs', nargs='*') parser.add_argument('--show-default', action='store_true') args = parser.parse_args() config.set_cuda(False) policies = [] for t in args.specs: name = os.path.splitext(os.path.basename(t.split(':')[-1]))[0] policy = torch_policy.load(t) policies.append((name, policy)) lines = [] switch = 'false' found = False for line in jsons.stream(args.in_file): if len(lines) == 0: whoami = line['state']['whoami'] for update in line['_updates']: if update.startswith('|error') or update.startswith('|request'): continue update = update.strip() if update.startswith('|player'): _1, _2, pid, pname = update.split('|')[:4] if pname == whoami: assert not found found = True switch = 'true' if pid == 'p2' else 'false' lines.append(update.strip()) if 'candidates' in line: lines.append(make_table(line, policies, args.show_default)) assert found lines = '\n'.join(lines) print(TEMPLATE % (lines, switch))
def start(args): logger = utils.default_logger_setup(logging.INFO) logger.info('Writing to ' + args.outdir) config.set_cuda(args.cuda) p1dir = os.path.join(args.outdir, 'p1') p2dir = os.path.join(args.outdir, 'p2') utils.mkdir_p(p1dir) utils.mkdir_p(p2dir) prog = os.path.join( config.get('showdown_root'), config.get('showdown_server_dir'), 'pokemon-showdown') #game = Game(options = formats.get(args.format), prog = prog) policy_1 = torch_policy.load(args.p1) policy_2 = torch_policy.load(args.p2) wins = [0, 0] logger.info('starting...') # TODO: make multithreaded. Maybe integrate this with RL experiment p1_teams, p2_teams = tg.init_lc_thunderdome() strategy_agent = team_choice.AgentStrategyProfile(p1_teams, p2_teams) for i in range(args.num_matches): #team1_ind = team_choice.teamchoice_random(formats.ou_teams) team1_ind = strategy_agent.select_action() team2_ind = strategy_agent.select_action_p2() game = Game(options = strategy_agent.get_teams(team1_ind, team2_ind), prog = prog) p1 = EnginePkmnPlayer(policy_1, '%s-p1' % i, play_best_move = args.play_best_move in ['p1', 'both']) p2 = EnginePkmnPlayer(policy_2, '%s-p2' % i, play_best_move = args.play_best_move in ['p2', 'both']) game.play(p1, p2) for j in [0, 1]: player = [p1, p2][j] dirname = [p1dir, p2dir][j] bogger = battlelogs.BattleLogger(player.gid, dirname) for block in player.blocks: bogger.log(block) bogger.close() if j == 0: if player.result == 'winner': strategy_agent.update(team1_ind, team2_ind, p1_win=True) else: strategy_agent.update(team1_ind, team2_ind, p1_win=False) if player.result == 'winner': wins[j] += 1 else: assert player.result in ['loser', 'tie'] print(strategy_agent.get_utility_matrix()) with open("lc_thunderdome_results.txt", "w+") as wf: for ct, team in enumerate(strategy_agent.p1_teams): wf.write("{}\t{}\n".format(ct, team)) for ct, team in enumerate(strategy_agent.p2_teams): wf.write("{}\t{}\n".format(ct, team)) # wf.write(strategy_agent.get_utility_matrix()) wf.write("\n") wf.flush() wf.close() return wins
def start(args): config.set_cuda(False) num_matches = args.num_matches username = args.username or utils.random_name() policy = torch_policy.load(args.spec) root_dir = args.root_dir or ('data/evals/%s' % utils.ts()) proc_log_fname = os.path.join(root_dir, 'debug.log') player_log_dir = None if args.debug_mode: player_log_dir = os.path.join(root_dir, 'player-logs') utils.mkdir_p(root_dir) if player_log_dir: utils.mkdir_p(player_log_dir) params = vars(args) params['username'] = username params['root_dir'] = root_dir with open(os.path.join(root_dir, 'config.json'), 'w') as fd: json.dump(params, fd) logger = utils.default_logger_setup(logging.DEBUG) fhandler = logging.handlers.RotatingFileHandler(proc_log_fname, maxBytes=16 * 1024 * 1024, backupCount=5) fhandler.setFormatter(logging.Formatter(constants.LOG_FORMAT)) if args.debug_mode: fhandler.setLevel(logging.DEBUG) else: fhandler.setLevel(logging.INFO) logger.addHandler(fhandler) conf = utils.Ns() conf.accept_challenges = False conf.formats = [args.format] conf.timer = True conf.username = username conf.host = args.host conf.port = args.port conf.max_concurrent = args.max_concurrent conf.pslog_dir = None conf.log_dir = player_log_dir conf.wait_time_before_requesting_move_seconds = args.wait_time_before_move logger.info('Setting up %s on %s:%s', conf.username, conf.host, conf.port) logger.info('Outputting logs to %s', root_dir) player_ctor = lambda gid: EnginePkmnPlayer( policy, gid, play_best_move=args.play_best_move) if args.team: with open(args.team) as fd: team = fd.read().strip() else: team = '' game = showdown.MatchmakingGame(conf, fmt=args.format, team=team) game.main() matches = dict((i, game([player_ctor])) for i in range(num_matches)) count = 0 record = {'winner': 0, 'loser': 0, 'tie': 0} while matches: found = False for i, msg in matches.items(): if msg.ready(): result = msg.get() logger.info('Finished %d/%d matches: %s', count + 1, num_matches, result) record[result['result']] += 1 count += 1 found = True break if found: del matches[i] gevent.sleep(1.) logger.info('Battles completed! Quitting...') params['record'] = record logger.info(params['record']) game.stop() game.join() return params
def simulate_and_rollup(expt_name, base_dir, parallelism, cuda): logger = logging.getLogger('simulate_and_rollup') expt = json.load(expt_name) # 1: Figure out which iteration we are running current_iter = divine_current_iteration(base_dir) iter_dir = os.path.join(base_dir, 'iter%06d' % current_iter) utils.mkdir_p(iter_dir) logger.info('Current iteration: %d', current_iter) # 2: Load the current policy file policy_tag = divine_current_policy_tag(expt, iter_dir, current_iter) logger.info('Using policy: %s', policy_tag) policy = torch_policy.load(policy_tag) # do a NaN check here for name, param in policy.named_parameters(): if torch.isnan(param).any().item(): raise ValueError('Encountered nan in latest model in parameter ' + name) rollup_fname = os.path.join(iter_dir, 'rollup.npz') assert not os.path.isfile(rollup_fname), 'rollup detected means matches already simulated' battles_dir = os.path.join(iter_dir, 'battles') utils.mkdir_p(battles_dir) num_battles = len([d for d in glob.glob(os.path.join(battles_dir, '*')) if len(os.listdir(d)) == 2]) total_matches = expt['simulate_args']['num_matches'] num_battles_remaining = total_matches - num_battles logger.info('%d battles left to simulate for this iteration', num_battles_remaining) if num_battles_remaining: start_time = time.time() def spawn_battler(bid): tag = str(bid) logger.info('Spawn battler with ID %s', bid) env = os.environ.copy() env['OMP_NUM_THREADS'] = '1' env['MKL_NUM_THREADS'] = '1' err_fd = open('/tmp/%03d.err.log' % bid, 'w') args = ['./rp', 'metagrok/exe/simulate_worker.py', policy_tag, expt.get('format', 'gen7randombattle'), str(bid), ] if 'epsilon' in expt['simulate_args']: args.append('--epsilon') args.append(str(expt['simulate_args']['epsilon'])) if 'p2' in expt['simulate_args']: args.append('--p2-policy-tag') args.append(str(expt['simulate_args']['p2'])) rv = subprocess.Popen( args, stdout = subprocess.PIPE, stdin = subprocess.PIPE, stderr = err_fd, env = env, encoding = 'utf-8', bufsize = 0, ) os.system('taskset -p -c %d %d' % (bid % mp.cpu_count(), rv.pid)) return rv, err_fd num_blocks = 0 battle_number = num_battles workers, fds = list(zip(*[spawn_battler(i) for i in range(parallelism)])) for i in range(num_battles_remaining): workers[i % len(workers)].stdin.write('battle\n') while battle_number < total_matches: time.sleep(0.1) for w in workers: line = w.stdout.readline().strip() if line: proc_battle_dir, num_blocks_in_battle = line.split() num_blocks_in_battle = int(num_blocks_in_battle) num_blocks += num_blocks_in_battle battle_dir = os.path.join(battles_dir, '%06d' % battle_number) shutil.rmtree(battle_dir, ignore_errors = True) shutil.move(proc_battle_dir, battle_dir) battle_number += 1 current_pct = int(100 * battle_number / total_matches) prev_pct = int(100 * (battle_number - 1) / total_matches) if current_pct > prev_pct: logger.info('Battle %s (%s%%) completed. Num blocks: %s', battle_number, current_pct, num_blocks_in_battle) if battle_number >= total_matches: break for fd in fds: fd.flush() for i, w in enumerate(workers): logger.info('Shutting down worker %s', i) w.stdin.write('done\n') w.communicate() for fd in fds: fd.close() for fname in glob.glob('/tmp/*.err.log'): os.remove(fname) total_time = time.time() - start_time logger.info('Ran %d blocks in %ss, rate = %s block/worker/s', num_blocks, total_time, float(num_blocks) / len(workers) / total_time) logger.info('Rolling up files...') rollup_fname = os.path.join(iter_dir, 'rollup.npz') num_records = perform_rollup(expt, iter_dir, policy_tag, parallelism, rollup_fname) expt_shortname = os.path.splitext(os.path.basename(expt_name))[0] return dict( a__status = 'Simulations complete', dir = iter_dir, iter = current_iter + 1, name = expt_shortname, num_matches = num_battles_remaining, num_total_records = num_records, subject = 'Experiment log: ' + base_dir, z__params = expt, )
def perform_policy_update(expt_name, base_dir, parallelism, cuda): logger = logging.getLogger('perform_policy_update') expt = json.load(expt_name) # 1: Figure out which iteration we are running current_iter = divine_current_iteration(base_dir) iter_dir = os.path.join(base_dir, 'iter%06d' % current_iter) utils.mkdir_p(iter_dir) logger.info('Current iteration: %d', current_iter) # 2: Load the current policy file policy_tag = divine_current_policy_tag(expt, iter_dir, current_iter) logger.info('Using policy: %s', policy_tag) rollup_fname = os.path.join(iter_dir, 'rollup.npz') assert os.path.isfile(rollup_fname), 'cannot do policy update without rollup file' start_time = time.time() npz = np.load(rollup_fname) all_extras = collections.defaultdict(list) for iter_offset in range(expt.get('updater_buffer_length_iters', 1)): iter_num = current_iter - iter_offset if iter_num >= 0: r_fname = os.path.join(base_dir, 'iter%06d' % iter_num, 'rollup.npz') logger.info('Loading: %s', r_fname) npz = np.load(r_fname) for k in npz.files: all_extras[k].append(npz[k]) extras = {} for k, vs in list(all_extras.items()): extras[k] = np.concatenate(vs) # This is a hack to save on memory. # The optimal solution is to # 1) read each rollup to determine array size, # 2) pre-allocate a big array and # 3) fill. # (metagrok/methods/learner.py does a similar thing but operates on jsons files.) del all_extras[k] del vs del all_extras learner.post_prepare(extras) total_time = time.time() - start_time logger.info('Loaded rollups in %ss', total_time) start_time = time.time() logger.info('Starting policy update...') extras = {k: torch.from_numpy(v) for k, v in extras.items()} extras = TTensorDictDataset(extras, in_place_shuffle = True) policy = torch_policy.load(policy_tag) updater_cls = utils.hydrate(expt['updater']) updater_args = dict(expt['updater_args']) for k, v in expt.get('updater_args_schedules', {}).items(): updater_args[k] = Scheduler(v).select(current_iter) updater = updater_cls(policy = policy, **updater_args) if config.use_cuda(): policy.cuda() updater.update(extras) if config.use_cuda(): policy.cpu() total_time = time.time() - start_time logger.info('Ran policy update in %ss', total_time) with open('/tmp/end_model_file.pytorch', 'wb') as fd: torch.save(policy.state_dict(), fd) end_model_file = os.path.join(iter_dir, 'end.pytorch') shutil.move('/tmp/end_model_file.pytorch', end_model_file) next_iter_dir = os.path.join(base_dir, 'iter%06d' % (current_iter + 1)) next_start_model_file = os.path.join(next_iter_dir, 'start.pytorch') utils.mkdir_p(next_iter_dir) shutil.copy(end_model_file, next_start_model_file) logger.info('Wrote to %s', next_start_model_file) expt_shortname = os.path.splitext(os.path.basename(expt_name))[0] return dict( a__status = 'Policy update complete', dir = iter_dir, iter = current_iter + 1, name = expt_shortname, next_start_model_file = next_start_model_file, subject = 'Experiment log: ' + base_dir, z__params = expt, )