def test_config_channel_last(): man = get_manager() for game in games: # create GenerationDescription generation_descr = templates.default_generation_desc(game) generation_descr.channel_last = True # lookup game in manager transformer = man.get_transformer(game, generation_descr) print transformer.x_cords # look game from database game_info = lookup.by_name(game) assert game == game_info.game sm = game_info.get_sm() basestate = sm.get_initial_state() print "rows x cols", transformer.num_rows, transformer.num_cols print print transformer.state_to_channels(basestate.to_list()) basestate = advance_state(game_info.get_sm(), basestate) print print transformer.state_to_channels(basestate.to_list())
def server_config_template(game, generation_prefix, prev_states): conf = confs.ServerConfig() conf.game = game conf.generation_prefix = generation_prefix conf.port = 9000 conf.current_step = 0 conf.num_samples_to_train = 20000 conf.max_samples_growth = 0.8 conf.base_generation_description = default_generation_desc( game, generation_prefix, multiple_policy_heads=True, num_previous_states=prev_states) from ggpzero.nn.manager import get_manager man = get_manager() transformer = man.get_transformer(game, conf.base_generation_description) conf.base_network_model = nn_model_config_template(game, "smaller", transformer) conf.base_training_config = train_config_template(game, generation_prefix) conf.self_play_config = selfplay_config_template() return conf
def test_speed_chess_network_sizes(): game = "speedChess" game_info = lookup.by_name(game) sm = game_info.get_sm() # create GenerationDescription def get_generation_descr(): generation_descr = templates.default_generation_desc(game) generation_descr.num_previous_states = 0 yield generation_descr generation_descr.num_previous_states = 2 yield generation_descr generation_descr.num_previous_states = 30 yield generation_descr generation_descr.multiple_policy_heads = True yield generation_descr man = get_manager() for descr in get_generation_descr(): transformer = man.get_transformer(game, descr) nn = man.create_new_network(game, "small", descr) nn.summary()
def test_tron(): import py.test py.test.skip("WIP") game = "tron_10x10" generation = "test_1" man = get_manager() # create a nn model_conf = templates.nn_model_config_template(game) generation_descr = templates.default_generation_desc( game, generation, multiple_policy_heads=True) nn = man.create_new_network(game, model_conf, generation_descr) game_info = lookup.by_name(game) sm = game_info.get_sm() basestate = sm.get_initial_state() policy, scores = nn.predict_1(basestate.to_list()) print policy, scores res = nn.predict_n([basestate.to_list(), basestate.to_list()]) assert len(res) == 2 and len(res[0]) == 2 and len(res[1]) == 2 print policy, scores
def test_config_previous_states(): man = get_manager() for game in games: # create GenerationMetaAttributes generation_descr = templates.default_generation_desc(game) generation_descr.num_previous_states = 2 # lookup game in manager transformer = man.get_transformer(game, generation_descr) print transformer.x_cords # look game from database game_info = lookup.by_name(game) assert game == game_info.game sm = game_info.get_sm() basestate0 = sm.get_initial_state() basestate1 = advance_state(game_info.get_sm(), basestate0) basestate2 = advance_state(game_info.get_sm(), basestate1) print "rows x cols", transformer.num_rows, transformer.num_cols print "num_channels", transformer.num_channels print "basestate0:" print transformer.state_to_channels(basestate0.to_list(), []) print "basestate1:" print transformer.state_to_channels(basestate1.to_list(), [basestate0.to_list()]) print "basestate2:" print transformer.state_to_channels( basestate2.to_list(), [basestate1.to_list(), basestate0.to_list()])
def test_basic_config(): man = get_manager() for game in games: # look game from database game_info = lookup.by_name(game) assert game == game_info.game sm = game_info.get_sm() basestate = sm.get_initial_state() # lookup game in manager transformer = man.get_transformer(game) print "rows x cols", transformer.num_rows, transformer.num_cols print transformer.x_cords print transformer.y_cords basestate = advance_state(game_info.get_sm(), basestate) print "1" print "=" * 50 print transformer.state_to_channels(basestate.to_list()) print "2" print "=" * 50 basestate = advance_state(game_info.get_sm(), basestate, do_swap=True) print transformer.state_to_channels(basestate.to_list()) for ii in range(20): basestate = advance_state(game_info.get_sm(), basestate) print "3" print "=" * 50 print transformer.state_to_channels(basestate.to_list())
def do_transformer(num_previous_states): game = "breakthrough" game_info = lookup.by_name(game) sm = game_info.get_sm() man = get_manager() # only multiple_policy_heads supported in c++ generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True, num_previous_states=num_previous_states) t = man.get_transformer(game, generation_descr) # create transformer wrapper object c_transformer = cppinterface.create_c_transformer(t) nn = man.create_new_network(game, "small", generation_descr) verbose = True total_predictions = 0 total_s0 = 0 total_s1 = 0 total_s2 = 0 for ii in range(10): print ii start = time.time() array = c_transformer.test(cppinterface.sm_to_ptr(sm)) total_s0 += time.time() - start sz = len(array) / (t.num_channels * t.channel_size) total_predictions += sz array = array.reshape(sz, t.num_channels, t.num_cols, t.num_rows) total_s1 += time.time() - start if verbose: np.set_printoptions(threshold=np.inf, formatter={'float_kind': float_formatter0}) print array # test we can actually predict res = nn.get_model().predict(array, batch_size=sz) # print res[0].shape # print res[1].shape total_s2 += time.time() - start if verbose: np.set_printoptions(threshold=np.inf, formatter={'float_kind': float_formatter1}) print res print total_predictions, "time taken", [ s * 1000 for s in (total_s0, total_s1, total_s2) ]
def setup(): # set up ggplib setup_once() # ensure we have database with ggplib lookup.get_database() # initialise keras/tf keras.init() # just ensures we have the manager ready get_manager() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.logging.set_verbosity(tf.logging.ERROR) np.set_printoptions(threshold=100000, precision=2)
def reversi_10(self, filename="../data/elo/r10.elo"): man = manager.get_manager() from ggpzero.battle.reversi import MatchInfo10 match_info = MatchInfo10() random_player = get_player("r", MOVE_TIME) mcs_player = get_player("m", MOVE_TIME, max_iterations=800) simplemcts_player = get_player("s", MOVE_TIME, max_tree_playout_iterations=800) all_players = [random_player, mcs_player, simplemcts_player] def dp(g, playouts, v): return define_player("r10", g, playouts, v, dirichlet_noise_pct=0.15, depth_temperature_stop=6, depth_temperature_start=6, max_dump_depth=1, random_scale=0.9) # note x1_7x was Scan first match # note x2_119 (or 121) was Scan second match (i think) # retrained new_x2_174... not sure what x2 state was in... # going to aggregate x2 and h3 and see if total makes stronger # @ 192 - massive jump in starting step 25 -> 83. # @ 211 - another jump, starting step 83 -> 100 # @ 211 - crazy add change to neutralise policy pcts # current: x2_224 - assuming this was Scan 3rd match gens = [] for name, num, incr in (["x1", 5, 10], ["x2", 49, 10], ['h5', 20, 10], ["kt1", 3, 5]): while True: gen = "%s_%s" % (name, num) if not man.can_load("reversi_10x10", gen): print "FAILED TO LOAD GEN", gen break gens.append(gen) num += incr # ensure this one gens.append("x2_224") all_players += [dp(g, 800, 3) for g in gens] all_players.append(dp("h5_100", 800, 1)) gen_elo(match_info, all_players, filename)
def speed_test(): ITERATIONS = 3 man = get_manager() # get data train_config = config() # get nn to test speed on transformer = man.get_transformer(train_config.game) trainer = train.TrainManager(train_config, transformer) nn_model_config = templates.nn_model_config_template(train_config.game, "small", transformer) generation_descr = templates.default_generation_desc(train_config.game) trainer.get_network(nn_model_config, generation_descr) data = trainer.gather_data() res = [] batch_size = 4096 sample_count = len(data.inputs) keras_model = trainer.nn.get_model() # warm up for i in range(2): idx, end_idx = i * batch_size, (i + 1) * batch_size print i, idx, end_idx inputs = np.array(data.inputs[idx:end_idx]) res.append(keras_model.predict(inputs, batch_size=batch_size)) print res[0] for _ in range(ITERATIONS): res = [] times = [] gc.collect() print 'Starting speed run' num_batches = sample_count / batch_size + 1 print "batches %s, batch_size %s, inputs: %s" % (num_batches, batch_size, len(data.inputs)) for i in range(num_batches): idx, end_idx = i * batch_size, (i + 1) * batch_size inputs = np.array(data.inputs[idx:end_idx]) print "inputs", len(inputs) s = time.time() Y = keras_model.predict(inputs, batch_size=batch_size) times.append(time.time() - s) print "outputs", len(Y[0]) print "times taken", times print "total_time taken", sum(times) print "predictions per second", sample_count / float(sum(times))
def setup(): # set up ggplib setup_once() # ensure we have database with ggplib lookup.get_database() # initialise keras/tf keras.init() # just ensures we have the manager ready get_manager() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.logging.set_verbosity(tf.logging.ERROR) np.set_printoptions(threshold=100000) from gzero_games.ggphack import addgame addgame.install_games()
def connect6(self, filename="../data/elo/connect6.elo"): from ggpzero.battle.connect6 import MatchInfo match_info = MatchInfo() def dp(g, playouts, v): return define_player("connect6", g, playouts, v, max_dump_depth=1, dirichlet_noise_pct=0.15) # random = 500 elo random_player = get_player("r", MOVE_TIME) mcs_player = get_player("m", MOVE_TIME, max_iterations=800) simplemcts_player = get_player("s", MOVE_TIME, max_tree_playout_iterations=800) all_players = [random_player, mcs_player, simplemcts_player] man = manager.get_manager() num = 5 gens = [] while True: gen = "h1_%s" % num if not man.can_load("connect6", gen): break gens.append(gen) num += 10 num = 145 while True: gen = "h2_%s" % num if not man.can_load("connect6", gen): break gens.append(gen) num += 5 gens += ["h1_183"] gens += [ "h2_281", "h2_267", "h2_272", "h2_274", "h2_277", "h2_306", "h2_318", "h2_321" ] all_players += [dp(g, 800, 3) for g in gens] gen_elo(match_info, all_players, filename, move_generator=move_generator_c6)
def __init__(self, transformer, gen_prefix, do_augment_data=False): self.transformer = transformer self.gen_prefix = gen_prefix self.do_augment_data = do_augment_data man = get_manager() self.data_path = man.samples_path(self.transformer.game, gen_prefix) self.summary_path = os.path.join(self.data_path, "gendata_summary.json") self.summary = self.get_summary() self.save_summary_file() bcolz.set_nthreads(4)
def test_cache(): buf = train.SamplesBuffer() # we need the data for this test conf = get_conf() # create a transformer man = get_manager() generation_descr = templates.default_generation_desc(conf.game) generation_descr.num_previous_states = 0 generation_descr.multiple_policy_heads = False transformer = man.get_transformer(conf.game) run_with_transformer(buf, conf, transformer)
def test_net_different_net_sizes(): man = get_manager() for game in games: # create a nn for these sizes for size in "tiny smaller small medium-small medium medium-large large larger massive".split( ): print print size nn = man.create_new_network(game, size) nn.summary() print print game, size
def bt8(self, filename="../data/elo/bt8.elo"): from ggpzero.battle.bt import MatchInfo match_info = MatchInfo(8) def dp(g, playouts, v): return define_player("bt8", g, playouts, v, depth_temperature_stop=4, depth_temperature_start=4, random_scale=0.5) # 3 models ran on LG all_players = [dp(g, 800, 3) for g in ("x6_90", "x6_96", "x6_102", "x6_106", "x6_111", "x6_116", "x6_123", "x6_127", "x6_132", "x6_139", "x6_145", "x6_151", "x6_158", "x6_163", "x6_171", "x6_177")] kt_gens = ["kt1_1", "kt1_2", "kt1_3", "kt1_4", "kt1_5", "kt1_7"] man = manager.get_manager() gens = [] for name, num, incr in (["kt1", 10, 4], ["kt3", 2, 3], ["kt5", 2, 10], ["f1", 1, 5], ["az1", 2, 3]): while True: gen = "%s_%s" % (name, num) if not man.can_load("breakthrough", gen): print "FAILED TO LOAD GEN", gen break gens.append(gen) num += incr all_players += [dp(g, 800, 3) for g in gens] random_player = get_player("r", MOVE_TIME) mcs_player = get_player("m", MOVE_TIME, max_iterations=800) simplemcts_player = get_player("s", MOVE_TIME, max_tree_playout_iterations=800) all_players += [random_player, mcs_player, simplemcts_player] gen_elo(match_info, all_players, filename)
def game_test(game, pretty_board, advance_state_count): # game stuff info = lookup.by_name(game) transformer = get_manager().get_transformer(game) # the translator t = sym.create_translator(info, transformer.game_desc, transformer.get_symmetries_desc()) # start with a statemachine - and advance 5 moves sm = info.get_sm() sm.reset() basestate = sm.get_initial_state() for i in range(advance_state_count): basestate = advance_state(sm, basestate) sm.update_bases(basestate) # print board & moves print "original board:" pretty_board(sm) prescription = sym.Prescription(transformer.get_symmetries_desc()) translated_basestate = sm.new_base_state() # do all reflections / rotations in prescription for do_reflection, rot_count in prescription: print "reflection", do_reflection, "rotations", rot_count # translate state/moves basestate_list = t.translate_basestate(basestate.to_list(), do_reflection, rot_count) basestate2_list = t.translate_basestate_faster(basestate.to_list(), do_reflection, rot_count) assert basestate_list == basestate2_list translated_moves = translate_moves(sm, basestate, t, do_reflection, rot_count) translated_basestate.from_list(basestate_list) assert all_moves(sm, translated_basestate) == translated_moves sm.update_bases(translated_basestate) # print board & moves pretty_board(sm) for role, moves in all_moves(sm, translated_basestate): print role, moves
def __init__(self, transformer, gen_prefix, do_augment_data=False, data_augment_pct=1.0, score_draw_as_random_hack=False): self.transformer = transformer self.gen_prefix = gen_prefix self.do_augment_data = do_augment_data self.data_augment_pct = data_augment_pct self.score_draw_as_random_hack = score_draw_as_random_hack man = get_manager() self.data_path = man.samples_path(self.transformer.game, gen_prefix) self.summary_path = os.path.join(self.data_path, "gendata_summary.json") self.summary = self.get_summary() self.save_summary_file() bcolz.set_nthreads(4)
def train(self, game, train_config, network_model, generation_description): assert train_config.game == game if self.trainer is None: # create a transformer man = get_manager() transformer = man.get_transformer(game, generation_description) # create the manager self.trainer = TrainManager(train_config, transformer, do_data_augmentation=True) self.trainer.update_config(train_config) self.trainer.get_network(network_model, generation_description) self.trainer.do_epochs() self.trainer.save()
def hex19(self, filename="../data/elo/hex19.elo"): ' hex19 - with new hex C++ SM ' from ggpzero.battle import hex2 man = manager.get_manager() match_info = hex2.MatchInfo(19) random_player = get_player("r", MOVE_TIME) # 1 second simplemcts_player = get_player("s", 1.0) all_players = [random_player, simplemcts_player] gens = [] for name, num, incr, maxg in (["h1", 258, 7, 360], ["h1", 361, 10, 489], ["h1", 496, 6, 700], ["h2", 255, 6, 500], ["t1", 5, 5, 100]): while True: gen = "%s_%s" % (name, num) if not man.can_load("hex_lg_19", gen): print "FAILED TO LOAD GEN", gen break gens.append(gen) num += incr if num > maxg: break def dp(g, playouts, v): return define_player("hex_lg_19", g, playouts, v, dirichlet_noise_pct=0.15, depth_temperature_increment=1.0, depth_temperature_max=10.0, depth_temperature_stop=8, depth_temperature_start=1, max_dump_depth=1, temperature=1.0, random_scale=0.8) gens += ["lalal_456", "lalal_490", "lalal_603"] gens += ["yy_291", "halfpol_291"] all_players += [dp(g, 800, 3) for g in gens] gen_elo(match_info, all_players, filename)
def test_cittaceot(): man = get_manager() game = "cittaceot" # create a nn nn = man.create_new_network(game, "tiny") game_info = lookup.by_name(game) sm = game_info.get_sm() basestate = sm.get_initial_state() predictions = nn.predict_1(basestate.to_list()) print predictions.policies, predictions.scores predictions = nn.predict_n([basestate.to_list(), basestate.to_list()]) assert len(predictions) == 2 and len(predictions[0].policies) == 2 and len( predictions[0].scores) == 2 print predictions
def setup_c4(batch_size=1024): game = "connectFour" man = get_manager() # only multiple_policy_heads supported in c++ generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True) nn = man.create_new_network(game, "small", generation_descr) game_info = lookup.by_name(game) supervisor = cppinterface.Supervisor(game_info.get_sm(), nn, batch_size=batch_size) conf = templates.selfplay_config_template() return supervisor, conf
def chess_15d(self, filename="../data/elo/chess_15d.elo"): def dp(g, playouts, v): return define_player("c_15f", g, playouts, v, dirichlet_noise_pct=0.15, depth_temperature_stop=6, depth_temperature_start=6, max_dump_depth=1, evaluation_multiplier_to_convergence=2.0, batch_size=8, noise_policy_squash_pct=0.75, noise_policy_squash_prob=0.1, fpu_prior_discount_root=0.1, fpu_prior_discount=0.2, random_scale=0.6) man = manager.get_manager() from ggpzero.battle import chess match_info = chess.MatchInfo(short_50=True) random_player = get_player("r", MOVE_TIME) mcs_player = get_player("m", MOVE_TIME, max_iterations=800) simplemcts_player = get_player("s", MOVE_TIME, max_tree_playout_iterations=800) all_players = [random_player] # mcs_player , simplemcts_player gens = [] for name, num, incr in (["c1", 5, 7], ["kb1", 3, 5], ["c2", 145, 5]): while True: gen = "%s_%s" % (name, num) if not man.can_load("chess_15d", gen): print "FAILED TO LOAD GEN", gen break gens.append(gen) num += incr gens.append("c2_367") all_players += [dp(g, 800, 3) for g in gens] gen_elo(match_info, all_players, filename)
def setup(): import tensorflow as tf from ggplib.util.init import setup_once setup_once() from ggpzero.util.keras import init init() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.logging.set_verbosity(tf.logging.ERROR) import numpy as np np.set_printoptions(threshold=100000) man = get_manager() if not man.can_load(GAME, RANDOM_GEN): network = man.create_new_network(GAME) man.save_network(network, RANDOM_GEN)
def files_to_sample_data(self, conf): man = get_manager() assert isinstance(conf, confs.TrainNNConfig) step = conf.next_step - 1 starting_step = conf.starting_step if starting_step < 0: starting_step = max(step + starting_step, 0) while step >= starting_step: store_path = man.samples_path(conf.game, conf.generation_prefix) fn = os.path.join(store_path, "gendata_%s_%s.json.gz" % (conf.game, step)) if fn not in self.sample_data_cache: raw_data = attrutil.json_to_attr(gzip.open(fn).read()) data = SamplesData(raw_data.game, raw_data.with_generation, raw_data.num_samples) total_draws = 0 for s in raw_data.samples: if abs(s.final_score[0] - 0.5) < 0.01: total_draws += 1 draws_ratio = total_draws / float(len(raw_data.samples)) log.info("Draws ratio %.2f" % draws_ratio) for s in raw_data.samples: data.add_sample(s) if len(data.samples) != data.num_samples: # pretty inconsequential, but we should at least notify msg = "num_samples (%d) versus actual samples (%s) differ... trimming" log.warning(msg % (data.num_samples, len(data.samples))) data.num_samples = min(len(data.samples), data.num_samples) data.samples = data.samples[:data.num_samples] self.sample_data_cache[fn] = data yield fn, self.sample_data_cache[fn] step -= 1
def get_network(self, nn_model_config, generation_descr): # abbreviate, easier on the eyes conf = self.train_config attrutil.pprint(nn_model_config) man = get_manager() if man.can_load(conf.game, self.next_generation): msg = "Generation already exists %s / %s" % (conf.game, self.next_generation) log.error(msg) if not conf.overwrite_existing: raise TrainException("Generation already exists %s / %s" % (conf.game, self.next_generation)) nn = None retraining = False if conf.use_previous: # default to next_generation_prefix, otherwise use conf.generation_descr candidates = [self.next_generation_prefix] if conf.generation_prefix != self.next_generation_prefix: candidates.append(conf.generation_prefix) for gen in candidates: prev_generation = "%s_%s" % (gen, conf.next_step - 1) if man.can_load(conf.game, prev_generation): log.info("Previous generation found: %s" % prev_generation) nn = man.load_network(conf.game, prev_generation) retraining = True break else: log.warning("Previous generation %s not found..." % (prev_generation)) if nn is None: nn = man.create_new_network(conf.game, nn_model_config, generation_descr) nn.summary() self.nn = nn self.retraining = retraining log.info("Network %s, retraining: %s" % (self.nn, self.retraining))
def check_files_exist(self): # first check that the directories exist man = get_manager() for p in (man.model_path(self.conf.game), man.weights_path(self.conf.game), man.generation_path(self.conf.game), man.samples_path(self.conf.game, self.conf.generation_prefix)): if os.path.exists(p): if not os.path.isdir(p): critical_error("Path exists and not directory: %s") else: log.warning("Attempting to create path: %s" % p) os.makedirs(p) if not os.path.exists(p) or not os.path.isdir(p): critical_error("Failed to create directory: %s" % p) self.check_nn_files_exist()
def setup_c4(batch_size=1024): game = "connectFour" man = get_manager() # only multiple_policy_heads supported in c++ generation_descr = templates.default_generation_desc( game, multiple_policy_heads=True) nn = man.create_new_network(game, "smaller", generation_descr) game_info = lookup.by_name(game) supervisor = cppinterface.Supervisor(game_info.get_sm(), nn, batch_size=batch_size) conf = confs.SelfPlayConfig() conf.max_number_of_samples = 4 conf.resign_score_probability = 0.1 conf.resign_false_positive_retry_percentage = 0.5 conf.select_iterations = 0 conf.sample_iterations = 50 conf.score_iterations = 5 p = conf.select_puct_config = templates.puct_config_template("policy") p.temperature = 0.5 p.depth_temperature_start = 1 p.depth_temperature_increment = 0.5 p.depth_temperature_stop = 40 p.random_scale = 0.85 p.choose = "choose_temperature" p.verbose = False conf.sample_puct_config = templates.puct_config_template("test") conf.sample_puct_config.verbose = False conf.score_puct_config = templates.puct_config_template("test") conf.score_puct_config.verbose = False return supervisor, conf
def go(): ITERATIONS = 3 man = get_manager() # get data train_config = config() # get nn to test speed on transformer = man.get_transformer(train_config.game) trainer = train.TrainManager(train_config, transformer) nn_model_config = templates.nn_model_config_template(train_config.game, "small", transformer) generation_descr = templates.default_generation_desc(train_config.game) trainer.get_network(nn_model_config, generation_descr) data = trainer.gather_data() r = Runner(trainer.gather_data(), trainer.nn.get_model()) r.warmup()
def reversi_8(self, filename="../data/elo/r8.elo"): man = manager.get_manager() from ggpzero.battle.reversi import MatchInfo8 match_info = MatchInfo8() def dp(g, playouts, v): return define_player("r8", g, playouts, v, dirichlet_noise_pct=0.15, depth_temperature_stop=6, depth_temperature_start=6, random_scale=0.75, max_dump_depth=1) random_player = get_player("r", MOVE_TIME) mcs_player = get_player("m", MOVE_TIME, max_iterations=800) simplemcts_player = get_player("s", MOVE_TIME, max_tree_playout_iterations=800) all_players = [random_player, mcs_player, simplemcts_player] gens = [] for name, num, incr in (["h3", 5, 20], ["h5", 10, 20], ['h6', 15, 20], ["kt1", 3, 5], ["kt2", 2, 5], ["f1", 2, 6], ["f2", 2, 6]): while True: gen = "%s_%s" % (name, num) if not man.can_load("reversi", gen): print "FAILED TO LOAD GEN", gen break gens.append(gen) num += incr all_players += [dp(g, 800, 3) for g in gens] gen_elo(match_info, all_players, filename)