def block(tensor): if simple: x = conv(tensor, "conv") x = bn_(x, "bn") if dropout: x = klayers.Dropout(dropout)(x) else: x = conv(tensor, "conv0") x = bn_(x, "bn0") x = act_(x, "act0") x = conv(x, "conv1", kernel_size=1) x = bn_(x, "bn1") x = act_(x, "act1") if dropout: x = klayers.Dropout(dropout)(x) x = conv(x, "conv2") x = bn_(x, "bn2") x = klayers.add([tensor, x], name=prefix + "add") return act_(x, "act_final")
def block(tensor): x = tensor for i in range(num_convs - 1): x = bn_(x, i + 1) x = act_(x, i + 1) x = conv(x, i + 1) x = bn_(x, num_convs) x = act_(x, num_convs) if dropout > 0: x = klayers.Dropout(dropout)(x) x = conv(x, num_convs) x = klayers.add([tensor, x], name=prefix + "add") return x
def block(tensor): x = tensor for i in range(num_convs - 1): x = bn_(x, i + 1) x = act_(x, i + 1) x = conv(x, i + 1) x = bn_(x, num_convs) x = act_(x, num_convs) if dropout is not None: x = klayers.Dropout(dropout, name=prefix + "dropout_%s" % num_convs)(x) x = conv(x, num_convs) if squeeze_excite: x = se_block(x, 3) x = klayers.add([tensor, x], name=prefix + "add") return x
def get_network_model(conf, generation_descr): assert isinstance(conf, confs.NNModelConfig) activation = 'leakyrelu' if conf.leaky_relu else 'relu' # inputs: if is_channels_first(): inputs_board = klayers.Input(shape=(conf.input_channels, conf.input_columns, conf.input_rows), name="inputs_board") else: inputs_board = klayers.Input(shape=(conf.input_columns, conf.input_rows, conf.input_channels), name="inputs_board") # XXX config abuse: v2 = conf.residual_layers <= 0 if v2: layer = klayers.Conv2D(conf.cnn_filter_size, 1, padding="same", use_bias=False, name='initial-conv')(inputs_board) # XXX hard coding dropout # XXX hard coding layers #for convs in [1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1]: for i, c in enumerate( [1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]): layer = residual_block_v2(conf.cnn_filter_size, conf.cnn_kernel_size, c, prefix="ResLayer_%s_" % i, dropout=0.3, activation=activation)(layer) else: # initial conv2d/Resnet on cords layer = conv2d_block(conf.cnn_filter_size, conf.cnn_kernel_size, activation=activation, padding="same", name='initial-conv')(inputs_board) # AG0 way: for i in range(conf.residual_layers): layer = residual_block_v1(conf.cnn_filter_size, conf.cnn_kernel_size, prefix="ResLayer_%s_" % i, activation=activation)(layer) # policy ######## # similar to AG0, but with multiple policy heads assert conf.multiple_policies number_of_policies = conf.role_count assert number_of_policies == len(conf.policy_dist_count) policy_heads = [] for idx, count in enumerate(conf.policy_dist_count): # residual net -> flattened for policy head # XXX 2, should be based on size of policy... to_flatten = conv2d_block(2, 1, name='to_flatten_policy_head_%s' % idx, activation=activation, padding='valid')(layer) flat = klayers.Flatten()(to_flatten) # output: policy head(s) if conf.dropout_rate_policy > 0: flat = klayers.Dropout(conf.dropout_rate_policy)(flat) head = klayers.Dense(count, name="policy_%d" % idx, activation="softmax")(flat) policy_heads.append(head) # value ####### # XXX config abuse: if generation_descr.draw_head: num_value_heads = 3 else: num_value_heads = 2 value_v3 = conf.value_hidden_size == 0 value_v2 = conf.value_hidden_size < 0 if value_v3: assert conf.input_columns == conf.input_rows average_layer = layer dims = conf.input_columns while dims >= 5: if dims % 2 == 1: average_layer = klayers.AveragePooling2D(4, 1)(average_layer) dims -= 3 else: average_layer = klayers.AveragePooling2D(2, 2)(average_layer) dims /= 2 assert dims < conf.input_columns to_flatten1 = conv2d_block(32, 1, name='reward_flatten1', activation=activation, do_bn=False, padding='valid')(average_layer) to_flatten2 = conv2d_block(1, 1, name='reward_flatten2', activation=activation, do_bn=False, padding='valid')(layer) flat = klayers.concatenate( [klayers.Flatten()(to_flatten1), klayers.Flatten()(to_flatten2)]) if conf.dropout_rate_value > 0: flat = klayers.Dropout(conf.dropout_rate_value)(flat) hidden = klayers.Dense(256, name="value_hidden")(flat) hidden = act(hidden, 'crelu', name="value_hidden_act") value_head = klayers.Dense(num_value_heads, activation="sigmoid", name="value")(hidden) elif value_v2: assert conf.input_columns == conf.input_rows output_layer = layer dims = conf.input_columns while dims > 5: if dims % 2 == 1: output_layer = klayers.AveragePooling2D(4, 1)(output_layer) dims -= 3 else: output_layer = klayers.AveragePooling2D(2, 2)(output_layer) dims /= 2 # XXX 16 - hardcoded to_flatten = klayers.Conv2D(16, 1, name='to_flatten_value_head', padding='valid', activation=activation)(output_layer) if conf.dropout_rate_value > 0: to_flatten = klayers.Dropout(conf.dropout_rate_value)(to_flatten) flat = klayers.Flatten()(to_flatten) value_head = klayers.Dense(num_value_heads, activation="sigmoid", name="value")(flat) else: # old way, as per AG0 to_flatten = conv2d_block(1, 1, name='to_flatten_value_head', padding='valid', activation=activation)(layer) flat = klayers.Flatten()(to_flatten) hidden = klayers.Dense(conf.value_hidden_size, name="value_hidden_layer", activation="relu")(flat) if conf.dropout_rate_value > 0: hidden = klayers.Dropout(conf.dropout_rate_value)(hidden) value_head = klayers.Dense(num_value_heads, activation="sigmoid", name="value")(hidden) # model: outputs = policy_heads + [value_head] model = keras_models.Model(inputs=[inputs_board], outputs=outputs) # add in weight decay? XXX rename conf to reflect it is weight decay and use +ve value instead # of hard coded value. # XXX this hasn't been tested if conf.l2_regularisation: for layer in model.layers: # XXX To get global weight decay in keras regularizers have to be added to every layer # in the model. In my models these layers are batch normalization (beta/gamma # regularizer) and dense/convolutions (W_regularizer/b_regularizer) layers. if hasattr(layer, 'kernel_regularizer'): # XXX too much? Is it doubled from paper? XXX 5e-3 ? layer.kernel_regularizer = keras_regularizers.l2(1e-4) return model
def get_network_model(conf, generation_descr): assert isinstance(conf, confs.NNModelConfig) activation = 'leakyrelu' if conf.leaky_relu else 'relu' # inputs: if is_channels_first(): inputs_board = klayers.Input(shape=(conf.input_channels, conf.input_columns, conf.input_rows), name="inputs_board") else: inputs_board = klayers.Input(shape=(conf.input_columns, conf.input_rows, conf.input_channels), name="inputs_board") # choose between resnet_v2 and resnet_v1 if conf.resnet_v2: all_layers = [] # initial conv2d/Resnet on cords layer = conv2d_block(conf.cnn_filter_size, 1, activation=activation, padding="same", name='initial-conv')(inputs_board) all_layers.append(layer) if conf.squeeze_excite_layers: squeeze_excite = True dropout = None else: squeeze_excite = False dropout = 0.2 for i in range(conf.residual_layers): layer = residual_block_v2(conf.cnn_filter_size, conf.cnn_kernel_size, 2, prefix="ResLayer_%s_" % i, squeeze_excite=squeeze_excite, dropout=dropout, activation=activation)(layer) all_layers.append(layer) else: # AG0 way: assert not conf.squeeze_excite_layers # initial conv2d/Resnet on cords layer = conv2d_block(conf.cnn_filter_size, conf.cnn_kernel_size, activation=activation, padding="same", name='initial-conv')(inputs_board) # layers for i in range(conf.residual_layers): layer = residual_block_v1(conf.cnn_filter_size, conf.cnn_kernel_size, prefix="ResLayer_%s_" % i, activation=activation)(layer) # policy ######## # similar to AG0, but with multiple policy heads number_of_policies = conf.role_count assert number_of_policies == len(conf.policy_dist_count) policy_heads = [] for idx, count in enumerate(conf.policy_dist_count): # residual net -> flattened for policy head # XXX 2, should be based on size of policy... to_flatten = conv2d_block(2, 1, name='to_flatten_policy_head_%s' % idx, activation=activation, padding='valid')(layer) flat = klayers.Flatten()(to_flatten) # output: policy head(s) if conf.dropout_rate_policy > 0: flat = klayers.Dropout(conf.dropout_rate_policy)(flat) head = klayers.Dense(count, name="policy_%d" % idx, activation="softmax")(flat) policy_heads.append(head) # value ####### if generation_descr.draw_head: num_value_heads = 3 else: num_value_heads = 2 if conf.concat_all_layers: assert not conf.global_pooling_value all_to_flatten = [] for idx, layer in enumerate(all_layers): to_flatten = conv2d_block(1, 1, name='value_flatten_%s' % idx, activation=activation, padding='valid')(layer) all_to_flatten.append(to_flatten) flat = klayers.concatenate( [klayers.Flatten()(f) for f in all_to_flatten]) elif conf.global_pooling_value: x = klayers.GlobalAveragePooling2D(name="value_average")(layer) to_flatten1 = klayers.Reshape((1, 1, conf.cnn_filter_size), name="value_reshape")(x) to_flatten2 = conv2d_block(1, 1, name='value_flatten', activation=activation, padding='valid')(layer) flat = klayers.concatenate( [klayers.Flatten()(to_flatten1), klayers.Flatten()(to_flatten2)]) else: # old way, as per AG0 to_flatten = conv2d_block(1, 1, name='value_flatten', activation=activation, do_bn=False, padding='valid')(layer) flat = klayers.Flatten()(to_flatten) if conf.dropout_rate_value > 0: flat = klayers.Dropout(conf.dropout_rate_value)(flat) hidden = klayers.Dense(conf.value_hidden_size, name="value_hidden", activation=activation)(flat) value_head = klayers.Dense(num_value_heads, activation='softmax', name="value")(hidden) # model: outputs = policy_heads + [value_head] return keras_models.Model(inputs=[inputs_board], outputs=outputs)
def get_network_model(conf): assert isinstance(conf, confs.NNModelConfig) # fancy l2 regularizer stuff extra_params = {} if conf.l2_regularisation: extra_params["kernel_regularizer"] = keras_regularizers.l2(1e-4) activation = 'leakyrelu' if conf.leaky_relu else 'relu' # inputs: if is_channels_first(): inputs_board = klayers.Input(shape=(conf.input_channels, conf.input_columns, conf.input_rows), name="inputs_board") else: inputs_board = klayers.Input(shape=(conf.input_columns, conf.input_rows, conf.input_channels), name="inputs_board") # initial conv2d/Resnet on cords layer = Conv2DBlock(conf.cnn_filter_size, conf.cnn_kernel_size, padding='same', activation=activation, name='initial', **extra_params)(inputs_board) for i in range(conf.residual_layers): layer = ResidualBlock(conf.cnn_filter_size, conf.cnn_kernel_size, name="ResLayer_%s" % i, activation=activation, **extra_params)(layer) number_of_policies = 1 if conf.multiple_policies: number_of_policies = conf.role_count assert number_of_policies == len(conf.policy_dist_count) policy_heads = [] for idx, count in enumerate(conf.policy_dist_count): # residual net -> flattened for policy head to_flatten = Conv2DBlock(2, 1, name='to_flatten_policy_head_%s' % idx, padding='valid', activation=activation, **extra_params)(layer) flat = klayers.Flatten()(to_flatten) # output: policy head(s) if conf.dropout_rate_policy > 0: flat = klayers.Dropout(conf.dropout_rate_policy)(flat) head = klayers.Dense(count, activation="softmax", name="policy_%d" % idx, **extra_params)(flat) policy_heads.append(head) # residual net -> flattened for value head to_flatten = Conv2DBlock(1, 1, name='to_flatten_value_head', padding='valid', activation=activation, **extra_params)(layer) flat = klayers.Flatten()(to_flatten) # output: value head hidden = klayers.Dense(conf.value_hidden_size, activation="relu", name="value_hidden_layer", **extra_params)(flat) if conf.dropout_rate_value > 0: hidden = klayers.Dropout(conf.dropout_rate_value)(hidden) value_head = klayers.Dense(conf.role_count, activation="sigmoid", name="value", **extra_params)(hidden) # model: outputs = policy_heads + [value_head] return keras_models.Model(inputs=[inputs_board], outputs=outputs)