class BoardTerm(object):
    ''' For nxn boards, we identify which terms we use and index into the base. '''

    term_idx = attribute(3)

    # terms = ["white", "black", "arrow"]
    terms = attribute(attr_factory(list))
示例#2
0
class AllRatings(object):
    game = at.attribute("game")

    # list of PlayerRating
    players = at.attribute(default=at.attr_factory(list))

    # simple log of recent games
    log = at.attribute(default=at.attr_factory(list))
class ControlBase(object):
    ''' a control base is basically a mapping from a gdl base to a value '''

    # list of argument terms - which must match exactly
    arg_terms = attribute(attr_factory(list))

    # we set the channel to this value
    value = attribute(1)
示例#4
0
class GameConfig(object):
    game_name = at.attribute("reversi")
    generation = at.attribute("genx")

    # 100 * n
    sims_multiplier = at.attribute(8)

    depth_temperature_max = at.attribute(1.5)
    depth_temperature_stop = at.attribute(16)
class GameDesc(object):
    game = attribute("checkers")

    # x_cords = "a b c d e f g h".split()
    x_cords = attribute(attr_factory(list))

    # y_cords = "1 2 3 4 5 6 7 8".split()
    y_cords = attribute(attr_factory(list))

    # list of BoardChannels (length kind of needs to be >= 1, or not much using convs)
    board_channels = attribute(attr_factory(list))

    # list of list of ControlChannels
    control_channels = attribute(attr_factory(list))
示例#6
0
class LGConfig(object):
    # LG convention is to postfix non humans with "_bot"
    whoami = at.attribute("gzero_bot")

    # fetch these from your browser, after logged in
    cookie = at.attribute("login2=.......; JSESSIONID=.......")

    # dry run, dont actually send moves
    dry_run = at.attribute(True)

    # list of GameConfig
    play_games = at.attribute(default=at.attr_factory(list))

    # store game path
    store_path = at.attribute("/home/rxe/working/ggpzero/data/lg/")
class BoardChannels(object):
    ''' board channels are defined by
        (a) the base term
        (b) a cross product of the board terms

    The value set on the channel itself will be if a matching base is set on the x/y cordinates.
    '''

    base_term = attribute("cell")

    # these are index to the term identifying the coordinates
    x_term_idx = attribute(1)
    y_term_idx = attribute(2)

    # list of BoardTerm (if any) - will result in taking a cross product if len() > 1
    board_terms = attribute(attr_factory(list))
示例#8
0
class PUCTPlayerConfig(object):
    name = attribute("Player")

    verbose = attribute(False)

    # XXX these should be renamed, and values less abused (0, -1 have special meaning)
    playouts_per_iteration = attribute(800)
    playouts_per_iteration_noop = attribute(1)

    generation = attribute("latest")

    # one of PUCTEvaluatorConfig/PUCTEvaluatorV2Config
    evaluator_config = attribute(default=attr_factory(PUCTEvaluatorV2Config))
class Symmetries(object):
    ''' defines the bases which symmetries can be applied  '''

    # list of ApplySymmetry
    apply_bases = attribute(attr_factory(list))

    # list of terms
    skip_bases = attribute(attr_factory(list))

    # list of ApplySymmetry
    apply_actions = attribute(attr_factory(list))

    # list of terms
    skip_actions = attribute(attr_factory(list))

    # do horizontal reflection
    do_reflection = attribute(False)

    # rotate x4
    do_rotations_90 = attribute(False)

    # rotate x2
    do_rotations_180 = attribute(False)
示例#10
0
class Ok(object):
    message = attribute("ok")
示例#11
0
class ControlChannel(object):
    ''' Creates a single channel.  The control bases need to be mutually exclusive (ie only one set
        at a time).  If none are set the value of the channel will be zero.  If a channel is set,
        it is the value defined in the ControlBase '''
    # a list of control bases.
    control_bases = attribute(attr_factory(list))
示例#12
0
class WorkerConfigMsg(object):
    conf = attribute(default=attr_factory(confs.WorkerConfig))
示例#13
0
class PlayerRating(object):
    name = at.attribute("xxyyyzz")
    played = at.attribute(42)
    elo = at.attribute(1302.124)
    fixed = at.attribute(False)
示例#14
0
class RequestNetworkTrain(object):
    game = attribute("game")
    train_conf = attribute(default=attr_factory(confs.TrainNNConfig))
    network_model = attribute(default=attr_factory(confs.NNModelConfig))
    generation_description = attribute(default=attr_factory(datadesc.GenerationDescription))
示例#15
0
class ConfigureSelfPlay(object):
    game = attribute("game")
    generation_name = attribute("gen0")
    self_play_conf = attribute(default=attr_factory(confs.SelfPlayConfig))
示例#16
0
class PUCTEvaluatorV2Config(object):
    verbose = attribute(False)

    puct_constant = attribute(0.75)
    puct_constant_root = attribute(2.5)

    # added to root child policy pct (alpha less than 0 is off)
    dirichlet_noise_pct = attribute(0.25)
    dirichlet_noise_alpha = attribute(-1)

    # looks up method() to use.  one of (choose_top_visits | choose_temperature)
    choose = attribute("choose_top_visits")

    # debug, only if verbose is true
    max_dump_depth = attribute(2)

    random_scale = attribute(0.5)
    temperature = attribute(1.0)
    depth_temperature_start = attribute(5)
    depth_temperature_increment = attribute(0.5)
    depth_temperature_stop = attribute(10)
    depth_temperature_max = attribute(5.0)

    # popular leela-zero feature: First Play Urgency.  When the policy space is large - this might
    # be neccessary.  If > 0, applies the prior of the parent, minus a discount to unvisited nodes
    # < 0 is off.
    fpu_prior_discount = attribute(-1)

    minimax_backup_ratio = attribute(0.75)
    minimax_threshold_visits = attribute(200)

    top_visits_best_guess_converge_ratio = attribute(0.8)

    think_time = attribute(10.0)
    converge_relaxed = attribute(5000)
    converge_non_relaxed = attribute(1000)

    # batches to GPU.  number of greenlets to run, along with virtual lossesa
    batch_size = attribute(32)
示例#17
0
class WorkerConfig(object):
    connect_port = attribute(9000)
    connect_ip_addr = attribute("127.0.0.1")
    do_training = attribute(False)
    do_self_play = attribute(False)
    self_play_batch_size = attribute(1)

    # passed into Supervisor, used instead of hard coded value.
    number_of_polls_before_dumping_stats = attribute(1024)

    # use to create SelfPlayManager.
    unique_identifier = attribute("pleasesetme")

    # number of threads to use during self play.  if this is set to zero, will do inline (no threads).
    num_workers = attribute(0)

    # slow things down (this is to prevent overheating GPU) [only if inline, ie num_workers == 0]
    sleep_between_poll = attribute(-1)

    # send back all the samples we have gathered after n seconds -
    # can also act like an application level keep alive
    server_poll_time = attribute(10)

    # the minimum number of samples gathered before sending to the server
    min_num_samples = attribute(128)

    # will exit if there is an update to the config
    exit_on_update_config = attribute(False)

    # dont replace the network every new generation, instead wait n generations
    # Note: lease this at 1.  XXX Remove this?  Not sure how useful it is.
    replace_network_every_n_gens = attribute(1)
示例#18
0
class ApplySymmetry(object):
    base_term = attribute("cell")

    # these are index to the term identifying the coordinates
    x_terms_idx = attribute(attr_factory(list))
    y_terms_idx = attribute(attr_factory(list))
示例#19
0
class TrainNNConfig(object):
    game = attribute("breakthrough")

    # the generation prefix is what defines our models (along with step). Be careful not to
    # overwrite these.
    generation_prefix = attribute("x1_")

    # uses previous network?
    use_previous = attribute(True)
    next_step = attribute(42)
    overwrite_existing = attribute(False)
    validation_split = attribute(0.8)
    batch_size = attribute(32)
    epochs = attribute(10)

    # this is applied even if max_sample_count can't be reached
    starting_step = attribute(0)

    # one of adam / amsgrad/ SGD
    compile_strategy = attribute("SGD")
    learning_rate = attribute(0.01)
    l2_regularisation = attribute(0.0001)

    # list of tuple.  This is the replay buffer.

    # [(5, 1.0), (10, 0.8)]
    # Will take the first 5 generations with all data and 80% of the next 10 generations.  Every
    # generation after is ignored.

    # [(-1, 1.0)]
    # Will take all generations with 100% data.

    # XXX better name would be replay_sample_buckets
    resample_buckets = attribute(default=attr_factory(list))

    # set the maximum size for an epoch.  buckets will be scaled accordingly.
    max_epoch_size = attribute(-1)

    # set the initial weight before for the first epoch between training on the next epoch the
    # value weight will automatically adjust based on whether overfitting occurs
    initial_value_weight = attribute(1.0)
示例#20
0
class TrainNNConfig(object):
    game = attribute("breakthrough")

    # the generation prefix is what defines our models (along with step). Be careful not to
    # overwrite these.
    generation_prefix = attribute("v2_")

    # uses previous network?
    use_previous = attribute(True)
    next_step = attribute(42)
    overwrite_existing = attribute(False)
    validation_split = attribute(0.8)
    batch_size = attribute(32)
    epochs = attribute(10)

    # this is applied even if max_sample_count can't be reached
    starting_step = attribute(0)

    # one of adam / amsgrad/ SGD
    compile_strategy = attribute("adam")
    learning_rate = attribute(None)

    # experimental:
    # list of tuple.  Idea is that at epoch we take a percentage of the samples to train.
    # [(5, 1.0), (10, 0.8), (0, 0.5), (-5, 0.2)]
    # which translates into, take all samples of first 5, 80% of next 10, 50% of next n, and 20% of
    # the last 5.  also assert number of gens is more than sum(abs(k) for k,_ in resample_buckets)
    resample_buckets = attribute(default=attr_factory(list))

    # set the maximum size for an epoch.  buckets will be scaled accordingly.
    max_epoch_size = attribute(-1)

    # set the initial weight before for the first epoch between training
    initial_value_weight = attribute(1.0)
示例#21
0
class NNModelConfig(object):
    role_count = attribute(2)

    input_rows = attribute(8)
    input_columns = attribute(8)
    input_channels = attribute(8)

    residual_layers = attribute(8)
    cnn_filter_size = attribute(64)
    cnn_kernel_size = attribute(3)

    value_hidden_size = attribute(256)

    multiple_policies = attribute(False)

    # the size of policy distribution.  The size of the list will be 1 if not multiple_policies.
    policy_dist_count = attribute(default=attr_factory(list))

    l2_regularisation = attribute(False)

    # < 0 - no dropout
    dropout_rate_policy = attribute(0.333)
    dropout_rate_value = attribute(0.5)

    leaky_relu = attribute(False)
示例#22
0
class SelfPlayConfig(object):
    # -1 is off, and defaults to alpha-zero style
    max_number_of_samples = attribute(4)

    # temperature for policy
    temperature_for_policy = attribute(1.0)

    # percentage of games to play from beginning to end (using sample_xxx config)
    play_full_game_pct = attribute(-1)

    # select will get to the point where we start sampling
    select_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    select_iterations = attribute(100)

    # sample is the actual sample we take to train for.  The focus is on good policy distribution.
    sample_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    sample_iterations = attribute(800)

    # after samples, will play to the end using this config
    score_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    score_iterations = attribute(100)

    # if the probability of losing drops below - then resign
    # and ignore resignation - and continue to end
    # two levels, resign0 should have more freedom than resign1
    resign0_score_probability = attribute(0.9)
    resign0_false_positive_retry_percentage = attribute(0.5)
    resign1_score_probability = attribute(0.975)
    resign1_false_positive_retry_percentage = attribute(0.1)

    # aborts play if play depth exceeds this max_length (-1 off)
    abort_max_length = attribute(-1)

    # lookback to see if states are draw
    number_repeat_states_draw = attribute(-1)

    # score to back prop, to try and avoid repeat states
    repeat_states_score = attribute(0.45)

    # chance of really resigning.  Will exit collecting.
    pct_actually_resign = attribute(0.4)

    # run to end (or scoring) - pct -> chance to actually run, score to exit on
    run_to_end_early_pct = attribute(0.2)
    run_to_end_early_score = attribute(0.01)
    run_to_end_minimum_game_depth = attribute(30)
示例#23
0
class SelfPlayConfig(object):
    # In each full game played out will oscillate between using sample_iterations and n <
    # evals_per_move.  so if set to 25% will take 25% of samples, and 75% will be skipped using n
    # evals.  This idea is adopted from KataGo and is NOT a full implementation of the idea there.
    # This is just the simplest way to introduce concept without changing much code.  < 0, off.
    oscillate_sampling_pct = attribute(0.25)

    # temperature for policy (XXX remove this, I have never used it)
    temperature_for_policy = attribute(1.0)

    # sample is the actual sample we take to train for.  The focus is on good policy distribution.
    puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    evals_per_move = attribute(800)

    # resign
    # two levels, resign0 should have more freedom than resign1
    resign0_score_probability = attribute(0.9)
    resign0_pct = attribute(0.5)

    resign1_score_probability = attribute(0.975)
    resign1_pct = attribute(0.1)

    # run to end after resign - pct -> chance to actually run, score to exit on
    run_to_end_pct = attribute(0.2)
    run_to_end_evals = attribute(42)
    run_to_end_puct_config = attribute(
        default=attr_factory(PUCTEvaluatorConfig))
    run_to_end_early_score = attribute(0.01)
    run_to_end_minimum_game_depth = attribute(30)

    # aborts play if play depth exceeds this max_length (-1 off)
    abort_max_length = attribute(-1)
示例#24
0
class PUCTEvaluatorConfig(object):
    verbose = attribute(False)

    # root level minmax ing, an old galvanise nn idea.  Expands the root node, and presets visits.
    # -1 off.
    root_expansions_preset_visits = attribute(-1)

    # applies different constant until the following expansions are met
    puct_before_expansions = attribute(4)
    puct_before_root_expansions = attribute(4)

    # the puct constant.  before expansions, and after expansions are met
    puct_constant_before = attribute(0.75)
    puct_constant_after = attribute(0.75)

    # added to root child policy pct (less than 0 is off)
    dirichlet_noise_pct = attribute(0.25)
    dirichlet_noise_alpha = attribute(0.1)

    # looks up method() to use.  one of (choose_top_visits | choose_temperature)
    choose = attribute("choose_top_visits")

    # debug, only if verbose is true
    max_dump_depth = attribute(2)

    random_scale = attribute(0.5)
    temperature = attribute(1.0)
    depth_temperature_start = attribute(5)
    depth_temperature_increment = attribute(0.5)
    depth_temperature_stop = attribute(10)
    depth_temperature_max = attribute(5.0)

    # popular leela-zero feature: First Play Urgency.  When the policy space is large - this might
    # be neccessary.  If > 0, applies the prior of the parent, minus a discount to unvisited nodes
    # < 0 is off.
    fpu_prior_discount = attribute(-1)
示例#25
0
class PUCTEvaluatorConfig(object):
    verbose = attribute(False)

    puct_constant = attribute(0.85)
    puct_constant_root = attribute(2.5)

    # added to root child policy pct (< 0 is off)
    dirichlet_noise_pct = attribute(0.25)

    # policy squashing during noise will squash any probabilities in policy over
    # noise_policy_squash_prob to noise_policy_squash_prob.  the pct is whether it will activate or
    # not during setting noise (< 0 is off) hence, it is an option to prevent overfitting self-play
    # with strong policy and to let dirichlet noise do its thing
    noise_policy_squash_pct = attribute(-1)
    noise_policy_squash_prob = attribute(0.05)

    # looks up method() to use.  one of (choose_top_visits | choose_temperature)
    choose = attribute("choose_top_visits")

    # debug, only if verbose is true
    max_dump_depth = attribute(2)

    # all the temperature settings
    random_scale = attribute(0.5)
    temperature = attribute(1.0)
    depth_temperature_start = attribute(5)
    depth_temperature_increment = attribute(0.5)
    depth_temperature_stop = attribute(10)
    depth_temperature_max = attribute(5.0)

    # popular leela-zero feature: First Play Urgency.  When the policy space is large - this might
    # be neccessary.  If > 0, applies the prior of the parent, minus a discount to unvisited nodes
    # < 0 is off.
    fpu_prior_discount = attribute(0.25)
    fpu_prior_discount_root = attribute(0.25)

    # main control for real matches
    think_time = attribute(10.0)

    # converge options.  converge basically means: top_visits == top_score in root.

    # says if we have visited more than enought so that converge-ment has occured
    converged_visits = attribute(5000)

    # if we need to bail and not converged, will allow some relaxation at choice time
    top_visits_best_guess_converge_ratio = attribute(0.8)

    # if using think_time, will multiply the think_time until converged
    # if using evals_per_move, then will multiply until converged.  Very useful for self-play and low evals.

    # XXX evals_per_move is called playouts_per_iteration below, need to fix this remnant.  In
    # SelfPlayConfig it is correctly named.
    evaluation_multiplier_to_convergence = attribute(1.0)

    # batches to GPU.  number of greenlets to run, along with virtual lossesa
    batch_size = attribute(32)

    # for repetition
    use_legals_count_draw = attribute(-1)

    # turns on the MCTS prover during back propagation of scores.
    backup_finalised = attribute(False)

    # allow transpositions in the game tree.  Non wise to use this in self-play.
    lookup_transpositions = attribute(False)
示例#26
0
class RequestSamples(object):
    # list of states (0/1 tuples) - to reduce duplicates
    new_states = attribute(default=attr_factory(list))
示例#27
0
class WorkerConfig(object):
    connect_port = attribute(9000)
    connect_ip_addr = attribute("127.0.0.1")
    do_training = attribute(False)
    do_self_play = attribute(False)
    self_play_batch_size = attribute(1)

    # passed into Supervisor, used instead of hard coded value.
    number_of_polls_before_dumping_stats = attribute(1024)

    # use to create SelfPlayManager
    unique_identifier = attribute("pleasesetme")

    # slow things down
    sleep_between_poll = attribute(-1)

    # send back whatever samples we have gather at this - sort of application level keep alive
    server_poll_time = attribute(10)

    # the minimum number of samples gathered before sending to the server
    min_num_samples = attribute(128)

    # if this is set to zero, will do inline
    num_workers = attribute(0)

    # run system commands to get the neural network isn't in data
    run_cmds_if_no_nn = attribute(default=attr_factory(list))

    # will exit if there is an update to the config
    exit_on_update_config = attribute(False)

    # dont replace the network every new network, instead wait n generations
    replace_network_every_n_gens = attribute(1)
示例#28
0
class RequestSampleResponse(object):
    # list of def.confs.Sample
    samples = attribute(default=attr_factory(list))
    duplicates_seen = attribute(0)
示例#29
0
class ServerConfig(object):
    game = attribute("breakthrough")
    generation_prefix = attribute("v42")

    port = attribute(9000)

    current_step = attribute(0)

    # number of samples to acquire before starting to train
    num_samples_to_train = attribute(1024)

    # maximum growth while training
    max_samples_growth = attribute(0.2)

    # the starting generation description
    base_generation_description = attribute(default=attr_factory(GenerationDescription))

    # the base network model
    base_network_model = attribute(default=attr_factory(NNModelConfig))

    # the starting training config
    base_training_config = attribute(default=attr_factory(TrainNNConfig))

    # the self play config
    self_play_config = attribute(default=attr_factory(SelfPlayConfig))

    # save the samples every n seconds
    checkpoint_interval = attribute(60.0 * 5)

    # this forces the network to be reset to random weights, every n generations
    reset_network_every_n_generations = attribute(-1)
示例#30
0
class NNModelConfig(object):
    role_count = attribute(2)

    input_rows = attribute(8)
    input_columns = attribute(8)
    input_channels = attribute(8)

    residual_layers = attribute(8)
    cnn_filter_size = attribute(64)
    cnn_kernel_size = attribute(3)

    value_hidden_size = attribute(256)

    # the size of policy distribution.
    policy_dist_count = attribute(default=attr_factory(list))

    # < 0 - no dropout
    dropout_rate_policy = attribute(0.333)
    dropout_rate_value = attribute(0.5)

    leaky_relu = attribute(False)
    squeeze_excite_layers = attribute(False)
    resnet_v2 = attribute(False)
    global_pooling_value = attribute(False)
    concat_all_layers = attribute(False)