class BoardTerm(object): ''' For nxn boards, we identify which terms we use and index into the base. ''' term_idx = attribute(3) # terms = ["white", "black", "arrow"] terms = attribute(attr_factory(list))
class AllRatings(object): game = at.attribute("game") # list of PlayerRating players = at.attribute(default=at.attr_factory(list)) # simple log of recent games log = at.attribute(default=at.attr_factory(list))
class ControlBase(object): ''' a control base is basically a mapping from a gdl base to a value ''' # list of argument terms - which must match exactly arg_terms = attribute(attr_factory(list)) # we set the channel to this value value = attribute(1)
class GameConfig(object): game_name = at.attribute("reversi") generation = at.attribute("genx") # 100 * n sims_multiplier = at.attribute(8) depth_temperature_max = at.attribute(1.5) depth_temperature_stop = at.attribute(16)
class GameDesc(object): game = attribute("checkers") # x_cords = "a b c d e f g h".split() x_cords = attribute(attr_factory(list)) # y_cords = "1 2 3 4 5 6 7 8".split() y_cords = attribute(attr_factory(list)) # list of BoardChannels (length kind of needs to be >= 1, or not much using convs) board_channels = attribute(attr_factory(list)) # list of list of ControlChannels control_channels = attribute(attr_factory(list))
class LGConfig(object): # LG convention is to postfix non humans with "_bot" whoami = at.attribute("gzero_bot") # fetch these from your browser, after logged in cookie = at.attribute("login2=.......; JSESSIONID=.......") # dry run, dont actually send moves dry_run = at.attribute(True) # list of GameConfig play_games = at.attribute(default=at.attr_factory(list)) # store game path store_path = at.attribute("/home/rxe/working/ggpzero/data/lg/")
class BoardChannels(object): ''' board channels are defined by (a) the base term (b) a cross product of the board terms The value set on the channel itself will be if a matching base is set on the x/y cordinates. ''' base_term = attribute("cell") # these are index to the term identifying the coordinates x_term_idx = attribute(1) y_term_idx = attribute(2) # list of BoardTerm (if any) - will result in taking a cross product if len() > 1 board_terms = attribute(attr_factory(list))
class PUCTPlayerConfig(object): name = attribute("Player") verbose = attribute(False) # XXX these should be renamed, and values less abused (0, -1 have special meaning) playouts_per_iteration = attribute(800) playouts_per_iteration_noop = attribute(1) generation = attribute("latest") # one of PUCTEvaluatorConfig/PUCTEvaluatorV2Config evaluator_config = attribute(default=attr_factory(PUCTEvaluatorV2Config))
class Symmetries(object): ''' defines the bases which symmetries can be applied ''' # list of ApplySymmetry apply_bases = attribute(attr_factory(list)) # list of terms skip_bases = attribute(attr_factory(list)) # list of ApplySymmetry apply_actions = attribute(attr_factory(list)) # list of terms skip_actions = attribute(attr_factory(list)) # do horizontal reflection do_reflection = attribute(False) # rotate x4 do_rotations_90 = attribute(False) # rotate x2 do_rotations_180 = attribute(False)
class Ok(object): message = attribute("ok")
class ControlChannel(object): ''' Creates a single channel. The control bases need to be mutually exclusive (ie only one set at a time). If none are set the value of the channel will be zero. If a channel is set, it is the value defined in the ControlBase ''' # a list of control bases. control_bases = attribute(attr_factory(list))
class WorkerConfigMsg(object): conf = attribute(default=attr_factory(confs.WorkerConfig))
class PlayerRating(object): name = at.attribute("xxyyyzz") played = at.attribute(42) elo = at.attribute(1302.124) fixed = at.attribute(False)
class RequestNetworkTrain(object): game = attribute("game") train_conf = attribute(default=attr_factory(confs.TrainNNConfig)) network_model = attribute(default=attr_factory(confs.NNModelConfig)) generation_description = attribute(default=attr_factory(datadesc.GenerationDescription))
class ConfigureSelfPlay(object): game = attribute("game") generation_name = attribute("gen0") self_play_conf = attribute(default=attr_factory(confs.SelfPlayConfig))
class PUCTEvaluatorV2Config(object): verbose = attribute(False) puct_constant = attribute(0.75) puct_constant_root = attribute(2.5) # added to root child policy pct (alpha less than 0 is off) dirichlet_noise_pct = attribute(0.25) dirichlet_noise_alpha = attribute(-1) # looks up method() to use. one of (choose_top_visits | choose_temperature) choose = attribute("choose_top_visits") # debug, only if verbose is true max_dump_depth = attribute(2) random_scale = attribute(0.5) temperature = attribute(1.0) depth_temperature_start = attribute(5) depth_temperature_increment = attribute(0.5) depth_temperature_stop = attribute(10) depth_temperature_max = attribute(5.0) # popular leela-zero feature: First Play Urgency. When the policy space is large - this might # be neccessary. If > 0, applies the prior of the parent, minus a discount to unvisited nodes # < 0 is off. fpu_prior_discount = attribute(-1) minimax_backup_ratio = attribute(0.75) minimax_threshold_visits = attribute(200) top_visits_best_guess_converge_ratio = attribute(0.8) think_time = attribute(10.0) converge_relaxed = attribute(5000) converge_non_relaxed = attribute(1000) # batches to GPU. number of greenlets to run, along with virtual lossesa batch_size = attribute(32)
class WorkerConfig(object): connect_port = attribute(9000) connect_ip_addr = attribute("127.0.0.1") do_training = attribute(False) do_self_play = attribute(False) self_play_batch_size = attribute(1) # passed into Supervisor, used instead of hard coded value. number_of_polls_before_dumping_stats = attribute(1024) # use to create SelfPlayManager. unique_identifier = attribute("pleasesetme") # number of threads to use during self play. if this is set to zero, will do inline (no threads). num_workers = attribute(0) # slow things down (this is to prevent overheating GPU) [only if inline, ie num_workers == 0] sleep_between_poll = attribute(-1) # send back all the samples we have gathered after n seconds - # can also act like an application level keep alive server_poll_time = attribute(10) # the minimum number of samples gathered before sending to the server min_num_samples = attribute(128) # will exit if there is an update to the config exit_on_update_config = attribute(False) # dont replace the network every new generation, instead wait n generations # Note: lease this at 1. XXX Remove this? Not sure how useful it is. replace_network_every_n_gens = attribute(1)
class ApplySymmetry(object): base_term = attribute("cell") # these are index to the term identifying the coordinates x_terms_idx = attribute(attr_factory(list)) y_terms_idx = attribute(attr_factory(list))
class TrainNNConfig(object): game = attribute("breakthrough") # the generation prefix is what defines our models (along with step). Be careful not to # overwrite these. generation_prefix = attribute("x1_") # uses previous network? use_previous = attribute(True) next_step = attribute(42) overwrite_existing = attribute(False) validation_split = attribute(0.8) batch_size = attribute(32) epochs = attribute(10) # this is applied even if max_sample_count can't be reached starting_step = attribute(0) # one of adam / amsgrad/ SGD compile_strategy = attribute("SGD") learning_rate = attribute(0.01) l2_regularisation = attribute(0.0001) # list of tuple. This is the replay buffer. # [(5, 1.0), (10, 0.8)] # Will take the first 5 generations with all data and 80% of the next 10 generations. Every # generation after is ignored. # [(-1, 1.0)] # Will take all generations with 100% data. # XXX better name would be replay_sample_buckets resample_buckets = attribute(default=attr_factory(list)) # set the maximum size for an epoch. buckets will be scaled accordingly. max_epoch_size = attribute(-1) # set the initial weight before for the first epoch between training on the next epoch the # value weight will automatically adjust based on whether overfitting occurs initial_value_weight = attribute(1.0)
class TrainNNConfig(object): game = attribute("breakthrough") # the generation prefix is what defines our models (along with step). Be careful not to # overwrite these. generation_prefix = attribute("v2_") # uses previous network? use_previous = attribute(True) next_step = attribute(42) overwrite_existing = attribute(False) validation_split = attribute(0.8) batch_size = attribute(32) epochs = attribute(10) # this is applied even if max_sample_count can't be reached starting_step = attribute(0) # one of adam / amsgrad/ SGD compile_strategy = attribute("adam") learning_rate = attribute(None) # experimental: # list of tuple. Idea is that at epoch we take a percentage of the samples to train. # [(5, 1.0), (10, 0.8), (0, 0.5), (-5, 0.2)] # which translates into, take all samples of first 5, 80% of next 10, 50% of next n, and 20% of # the last 5. also assert number of gens is more than sum(abs(k) for k,_ in resample_buckets) resample_buckets = attribute(default=attr_factory(list)) # set the maximum size for an epoch. buckets will be scaled accordingly. max_epoch_size = attribute(-1) # set the initial weight before for the first epoch between training initial_value_weight = attribute(1.0)
class NNModelConfig(object): role_count = attribute(2) input_rows = attribute(8) input_columns = attribute(8) input_channels = attribute(8) residual_layers = attribute(8) cnn_filter_size = attribute(64) cnn_kernel_size = attribute(3) value_hidden_size = attribute(256) multiple_policies = attribute(False) # the size of policy distribution. The size of the list will be 1 if not multiple_policies. policy_dist_count = attribute(default=attr_factory(list)) l2_regularisation = attribute(False) # < 0 - no dropout dropout_rate_policy = attribute(0.333) dropout_rate_value = attribute(0.5) leaky_relu = attribute(False)
class SelfPlayConfig(object): # -1 is off, and defaults to alpha-zero style max_number_of_samples = attribute(4) # temperature for policy temperature_for_policy = attribute(1.0) # percentage of games to play from beginning to end (using sample_xxx config) play_full_game_pct = attribute(-1) # select will get to the point where we start sampling select_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) select_iterations = attribute(100) # sample is the actual sample we take to train for. The focus is on good policy distribution. sample_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) sample_iterations = attribute(800) # after samples, will play to the end using this config score_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) score_iterations = attribute(100) # if the probability of losing drops below - then resign # and ignore resignation - and continue to end # two levels, resign0 should have more freedom than resign1 resign0_score_probability = attribute(0.9) resign0_false_positive_retry_percentage = attribute(0.5) resign1_score_probability = attribute(0.975) resign1_false_positive_retry_percentage = attribute(0.1) # aborts play if play depth exceeds this max_length (-1 off) abort_max_length = attribute(-1) # lookback to see if states are draw number_repeat_states_draw = attribute(-1) # score to back prop, to try and avoid repeat states repeat_states_score = attribute(0.45) # chance of really resigning. Will exit collecting. pct_actually_resign = attribute(0.4) # run to end (or scoring) - pct -> chance to actually run, score to exit on run_to_end_early_pct = attribute(0.2) run_to_end_early_score = attribute(0.01) run_to_end_minimum_game_depth = attribute(30)
class SelfPlayConfig(object): # In each full game played out will oscillate between using sample_iterations and n < # evals_per_move. so if set to 25% will take 25% of samples, and 75% will be skipped using n # evals. This idea is adopted from KataGo and is NOT a full implementation of the idea there. # This is just the simplest way to introduce concept without changing much code. < 0, off. oscillate_sampling_pct = attribute(0.25) # temperature for policy (XXX remove this, I have never used it) temperature_for_policy = attribute(1.0) # sample is the actual sample we take to train for. The focus is on good policy distribution. puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) evals_per_move = attribute(800) # resign # two levels, resign0 should have more freedom than resign1 resign0_score_probability = attribute(0.9) resign0_pct = attribute(0.5) resign1_score_probability = attribute(0.975) resign1_pct = attribute(0.1) # run to end after resign - pct -> chance to actually run, score to exit on run_to_end_pct = attribute(0.2) run_to_end_evals = attribute(42) run_to_end_puct_config = attribute( default=attr_factory(PUCTEvaluatorConfig)) run_to_end_early_score = attribute(0.01) run_to_end_minimum_game_depth = attribute(30) # aborts play if play depth exceeds this max_length (-1 off) abort_max_length = attribute(-1)
class PUCTEvaluatorConfig(object): verbose = attribute(False) # root level minmax ing, an old galvanise nn idea. Expands the root node, and presets visits. # -1 off. root_expansions_preset_visits = attribute(-1) # applies different constant until the following expansions are met puct_before_expansions = attribute(4) puct_before_root_expansions = attribute(4) # the puct constant. before expansions, and after expansions are met puct_constant_before = attribute(0.75) puct_constant_after = attribute(0.75) # added to root child policy pct (less than 0 is off) dirichlet_noise_pct = attribute(0.25) dirichlet_noise_alpha = attribute(0.1) # looks up method() to use. one of (choose_top_visits | choose_temperature) choose = attribute("choose_top_visits") # debug, only if verbose is true max_dump_depth = attribute(2) random_scale = attribute(0.5) temperature = attribute(1.0) depth_temperature_start = attribute(5) depth_temperature_increment = attribute(0.5) depth_temperature_stop = attribute(10) depth_temperature_max = attribute(5.0) # popular leela-zero feature: First Play Urgency. When the policy space is large - this might # be neccessary. If > 0, applies the prior of the parent, minus a discount to unvisited nodes # < 0 is off. fpu_prior_discount = attribute(-1)
class PUCTEvaluatorConfig(object): verbose = attribute(False) puct_constant = attribute(0.85) puct_constant_root = attribute(2.5) # added to root child policy pct (< 0 is off) dirichlet_noise_pct = attribute(0.25) # policy squashing during noise will squash any probabilities in policy over # noise_policy_squash_prob to noise_policy_squash_prob. the pct is whether it will activate or # not during setting noise (< 0 is off) hence, it is an option to prevent overfitting self-play # with strong policy and to let dirichlet noise do its thing noise_policy_squash_pct = attribute(-1) noise_policy_squash_prob = attribute(0.05) # looks up method() to use. one of (choose_top_visits | choose_temperature) choose = attribute("choose_top_visits") # debug, only if verbose is true max_dump_depth = attribute(2) # all the temperature settings random_scale = attribute(0.5) temperature = attribute(1.0) depth_temperature_start = attribute(5) depth_temperature_increment = attribute(0.5) depth_temperature_stop = attribute(10) depth_temperature_max = attribute(5.0) # popular leela-zero feature: First Play Urgency. When the policy space is large - this might # be neccessary. If > 0, applies the prior of the parent, minus a discount to unvisited nodes # < 0 is off. fpu_prior_discount = attribute(0.25) fpu_prior_discount_root = attribute(0.25) # main control for real matches think_time = attribute(10.0) # converge options. converge basically means: top_visits == top_score in root. # says if we have visited more than enought so that converge-ment has occured converged_visits = attribute(5000) # if we need to bail and not converged, will allow some relaxation at choice time top_visits_best_guess_converge_ratio = attribute(0.8) # if using think_time, will multiply the think_time until converged # if using evals_per_move, then will multiply until converged. Very useful for self-play and low evals. # XXX evals_per_move is called playouts_per_iteration below, need to fix this remnant. In # SelfPlayConfig it is correctly named. evaluation_multiplier_to_convergence = attribute(1.0) # batches to GPU. number of greenlets to run, along with virtual lossesa batch_size = attribute(32) # for repetition use_legals_count_draw = attribute(-1) # turns on the MCTS prover during back propagation of scores. backup_finalised = attribute(False) # allow transpositions in the game tree. Non wise to use this in self-play. lookup_transpositions = attribute(False)
class RequestSamples(object): # list of states (0/1 tuples) - to reduce duplicates new_states = attribute(default=attr_factory(list))
class WorkerConfig(object): connect_port = attribute(9000) connect_ip_addr = attribute("127.0.0.1") do_training = attribute(False) do_self_play = attribute(False) self_play_batch_size = attribute(1) # passed into Supervisor, used instead of hard coded value. number_of_polls_before_dumping_stats = attribute(1024) # use to create SelfPlayManager unique_identifier = attribute("pleasesetme") # slow things down sleep_between_poll = attribute(-1) # send back whatever samples we have gather at this - sort of application level keep alive server_poll_time = attribute(10) # the minimum number of samples gathered before sending to the server min_num_samples = attribute(128) # if this is set to zero, will do inline num_workers = attribute(0) # run system commands to get the neural network isn't in data run_cmds_if_no_nn = attribute(default=attr_factory(list)) # will exit if there is an update to the config exit_on_update_config = attribute(False) # dont replace the network every new network, instead wait n generations replace_network_every_n_gens = attribute(1)
class RequestSampleResponse(object): # list of def.confs.Sample samples = attribute(default=attr_factory(list)) duplicates_seen = attribute(0)
class ServerConfig(object): game = attribute("breakthrough") generation_prefix = attribute("v42") port = attribute(9000) current_step = attribute(0) # number of samples to acquire before starting to train num_samples_to_train = attribute(1024) # maximum growth while training max_samples_growth = attribute(0.2) # the starting generation description base_generation_description = attribute(default=attr_factory(GenerationDescription)) # the base network model base_network_model = attribute(default=attr_factory(NNModelConfig)) # the starting training config base_training_config = attribute(default=attr_factory(TrainNNConfig)) # the self play config self_play_config = attribute(default=attr_factory(SelfPlayConfig)) # save the samples every n seconds checkpoint_interval = attribute(60.0 * 5) # this forces the network to be reset to random weights, every n generations reset_network_every_n_generations = attribute(-1)
class NNModelConfig(object): role_count = attribute(2) input_rows = attribute(8) input_columns = attribute(8) input_channels = attribute(8) residual_layers = attribute(8) cnn_filter_size = attribute(64) cnn_kernel_size = attribute(3) value_hidden_size = attribute(256) # the size of policy distribution. policy_dist_count = attribute(default=attr_factory(list)) # < 0 - no dropout dropout_rate_policy = attribute(0.333) dropout_rate_value = attribute(0.5) leaky_relu = attribute(False) squeeze_excite_layers = attribute(False) resnet_v2 = attribute(False) global_pooling_value = attribute(False) concat_all_layers = attribute(False)