def collect_training_stats(environment, species, generation): training_info = TrainingInfo.load(environment, species) # Find batch num_batches_to_train = 0 wall_clock_time_to_train = 0.0 cpu_seconds_to_train = 0.0 mcts_considerations = 0 if generation > 1: for tbatch in training_info.batches: num_batches_to_train += 1 wall_clock_time_to_train += tbatch.self_play_end_time - tbatch.self_play_start_time # util is reported from 0.0 to 100.0 for each CPU for some reason... cpu_seconds_to_train += (tbatch.self_play_cpu_time / 100.0) mcts_considerations += tbatch.total_mcts_considerations # This is the batch that trained this generation if tbatch.generation_trained == generation: break return dict( num_batches_to_train=num_batches_to_train, wall_clock_time_to_train=wall_clock_time_to_train, cpu_seconds_to_train=cpu_seconds_to_train, mcts_considerations=mcts_considerations, )
def collate_training_efficiency_stats(environment, tournament_id): ''' :environment ~ "connect_four" :tournament_id ~ "1593043900-gbdt_pcrR2-1-21" - This is just the basename of the path. ''' bot_figure_stats = [] tournament_results_path = build_tournament_results_path(tournament_id) results = json.loads(open(tournament_results_path, 'r').read()) # Update all the batch info that needs to be updated for species in set(x[0] for x in results): training_info = TrainingInfo.load(environment, species) training_info.update_batch_stats() # Collect info for each bot in tournament for species, generation, skill_level, skill_sigma in results: gen_info = GenerationInfo.from_generation_info( environment, species, generation, ) tourn_info = TournamentStats( environment, species, generation, skill_level, skill_sigma, ) bot_figure_stats.append((gen_info, tourn_info)) return bot_figure_stats
def update_batch_stats(environment_name, species_name, num_workers=12): sys_mon = SystemMonitor() tinfo = TrainingInfo.load(environment_name, species_name) for batch in tinfo.batches: # If batch stats exist, don't redo... if batch.self_play_cpu_time is not None: print("Batch stats already updated for", batch.batch_num) continue batch.self_play_cpu_time = sys_mon.query_utilization( batch.self_play_start_time, batch.self_play_end_time, ) # Sanity check that there was a continuous sampling of cpu utlization. # - Check that it got at least a sample every 3 seconds self_play_cpu_time, num_samples = sys_mon.query_utilization( batch.self_play_start_time, batch.self_play_end_time, ) min_allowable_samples = ((batch.self_play_end_time - batch.self_play_start_time) / 3) if num_samples < min_allowable_samples: raise RuntimeError(f"Monitoring didn't take enough samples: {num_samples} < {min_allowable_samples}") batch.self_play_cpu_time = self_play_cpu_time # Grab batch stats worker_args = [] for worker_num in range(num_workers): worker_args.append( ( environment_name, species_name, batch.batch_num, worker_num, num_workers ) ) with Pool(num_workers) as p: results = p.map(run_worker, worker_args) batch.num_games = 0 batch.num_positions = 0 batch.total_mcts_considerations = 0 for worker_num, result in enumerate(results): print("Finished", worker_num, species_name, batch.batch_num) stats = WorkerStats(*result) batch.num_games += stats.num_games batch.num_positions += stats.num_positions batch.total_mcts_considerations += stats.total_mcts_considerations # These are tabulated for both agents, total_mcts_considerations is not. batch.num_games = batch.num_games // 2 batch.num_positions = batch.num_positions // 2 # Record the batch info tinfo.save()
def collect_figure_data( self, environment: str, species_list: List[str], ): data = [] # (species, training_time, generation_number) # Update all the batch info that needs to be updated for species in species_list: training_info = TrainingInfo.load(environment, species) training_info.update_batch_stats() for generation in range(1, training_info.current_self_play_generation()): gen_info = GenerationInfo.from_generation_info( environment, species, generation, ) data.append((species, gen_info.cpu_seconds_to_train, generation)) return data
def training_stats(environment, species, generation): training_info = TrainingInfo.load(environment, species) # Find batch num_batches_to_train = 0 wall_clock_time_to_train = 0.0 cpu_seconds_to_train = 0.0 if generation > 1: for tbatch in training_info.batches: num_batches_to_train += 1 wall_clock_time_to_train += tbatch.assessment_end_time - tbatch.self_play_start_time cpu_seconds_to_train += 0.0 # XXX: Update # This is the batch that trained this generation if tbatch.generation_trained == generation: break return dict( num_batches_to_train=num_batches_to_train, wall_clock_time_to_train=wall_clock_time_to_train, cpu_seconds_to_train=cpu_seconds_to_train, )
def build(cls, environment, species): ti = TrainingInfo.load(environment, species) generations = [x.generation_trained for x in ti.batches if x.generation_trained] rows = [] for generation in generations: for model_type in ("value", "policy"): model_directory = build_model_directory(environment, species, generation) info_path = f"{model_directory}/{model_type}_model_training_info_{generation:06d}.json" info = GBDTTrainingInfo.load(info_path) for eval_stat in info.eval_stats: rows.append( EvalData( environment=environment, species=species, generation=generation, model_type=model_type, dataset=eval_stat.dataset, metric=eval_stat.metric, iteration=eval_stat.iteration, value=eval_stat.value ) ) return cls(rows=rows)
def run( environment, species_name, num_batches, num_workers=settings.SELF_PLAY_THREADS, adjusted_win_rate_threshold=0.50, num_assessment_games=200, ): num_faceoff_rounds = math.ceil( num_assessment_games / num_workers) # Will play at least num_workers per round training_info = TrainingInfo.load(environment, species_name) final_training_batch = len(training_info.batches) + num_batches for _ in range(num_batches): current_batch = len(training_info.batches) + 1 generation_self_play = training_info.current_self_play_generation() generation_training = generation_self_play + 1 species = get_species(species_name) print(f"\n\nBatch {current_batch} / {final_training_batch}") print(f"environment: {environment}, species: {species_name}") print(f"self-play generation: {generation_self_play}") # Ensure directories are made/etc. # - Not sure this actually depends on generation, but maybe it will later. setup_filesystem( environment, species_name, generation_self_play, ) # Self play another batch games_per_batch = species.self_play_settings( environment, generation_self_play)["num_games"] print(f"\n\nSelf Play ({games_per_batch} cycles)") self_play_start_time = time.time() run_self_play( environment, species_name, generation_self_play, games_per_batch, current_batch, num_workers, ) self_play_end_time = time.time() elapsed = round(self_play_end_time - self_play_start_time, 1) cycles_per_second = round(games_per_batch / elapsed, 1) print(f"\nSelf play finished in {elapsed} seconds") print( f"Cycles ran: {games_per_batch}, cycles per sec: {cycles_per_second}" ) # Train new model print("\n\nTraining") training_start_time = time.time() run_model_training( environment, species_name, generation_training, current_batch, num_workers, ) training_end_time = time.time() elapsed = round(training_end_time - training_start_time, 1) print(f"\nTrained new models in {elapsed} seconds") # Assess new model print("\n\nAssessing") assessment_start_time = time.time() contender_matchup_info = run_faceoff( environment, species_name, generation_training, num_rounds=num_faceoff_rounds, num_workers=num_workers, ) assessment_end_time = time.time() elapsed = round(assessment_end_time - assessment_start_time, 1) print(f"\nAssessed new model in {elapsed} seconds") adjusted_win_rate = contender_matchup_info.win_rate(draw_weight=0.5) print("Adjusted Win Rate:", round(adjusted_win_rate, 3)) generation_trained = None if adjusted_win_rate >= adjusted_win_rate_threshold: generation_trained = generation_training print("FOUND NEW BOT:", generation_trained) training_info.finalize_batch( self_play_start_time=self_play_start_time, self_play_end_time=self_play_end_time, training_start_time=training_start_time, training_end_time=training_end_time, assessment_start_time=assessment_start_time, assessment_end_time=assessment_end_time, generation_self_play=generation_self_play, generation_trained=generation_trained, assessed_awr=adjusted_win_rate, )