示例#1
0
    def block(tensor):
        if simple:
            x = conv(tensor, "conv")
            x = bn_(x, "bn")

            if dropout:
                x = klayers.Dropout(dropout)(x)

        else:
            x = conv(tensor, "conv0")
            x = bn_(x, "bn0")
            x = act_(x, "act0")

            x = conv(x, "conv1", kernel_size=1)
            x = bn_(x, "bn1")
            x = act_(x, "act1")

            if dropout:
                x = klayers.Dropout(dropout)(x)

            x = conv(x, "conv2")
            x = bn_(x, "bn2")

        x = klayers.add([tensor, x], name=prefix + "add")
        return act_(x, "act_final")
示例#2
0
    def block(tensor):
        x = tensor
        for i in range(num_convs - 1):
            x = bn_(x, i + 1)
            x = act_(x, i + 1)
            x = conv(x, i + 1)

        x = bn_(x, num_convs)
        x = act_(x, num_convs)

        if dropout > 0:
            x = klayers.Dropout(dropout)(x)

        x = conv(x, num_convs)
        x = klayers.add([tensor, x], name=prefix + "add")

        return x
示例#3
0
    def block(tensor):
        x = tensor
        for i in range(num_convs - 1):
            x = bn_(x, i + 1)
            x = act_(x, i + 1)
            x = conv(x, i + 1)

        x = bn_(x, num_convs)
        x = act_(x, num_convs)

        if dropout is not None:
            x = klayers.Dropout(dropout,
                                name=prefix + "dropout_%s" % num_convs)(x)

        x = conv(x, num_convs)

        if squeeze_excite:
            x = se_block(x, 3)

        x = klayers.add([tensor, x], name=prefix + "add")

        return x
示例#4
0
def get_network_model(conf, generation_descr):
    assert isinstance(conf, confs.NNModelConfig)

    activation = 'leakyrelu' if conf.leaky_relu else 'relu'

    # inputs:
    if is_channels_first():
        inputs_board = klayers.Input(shape=(conf.input_channels,
                                            conf.input_columns,
                                            conf.input_rows),
                                     name="inputs_board")
    else:
        inputs_board = klayers.Input(shape=(conf.input_columns,
                                            conf.input_rows,
                                            conf.input_channels),
                                     name="inputs_board")

    # XXX config abuse:
    v2 = conf.residual_layers <= 0
    if v2:
        layer = klayers.Conv2D(conf.cnn_filter_size,
                               1,
                               padding="same",
                               use_bias=False,
                               name='initial-conv')(inputs_board)

        # XXX hard coding dropout
        # XXX hard coding layers
        #for convs in [1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1]:
        for i, c in enumerate(
            [1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]):
            layer = residual_block_v2(conf.cnn_filter_size,
                                      conf.cnn_kernel_size,
                                      c,
                                      prefix="ResLayer_%s_" % i,
                                      dropout=0.3,
                                      activation=activation)(layer)

    else:
        # initial conv2d/Resnet on cords
        layer = conv2d_block(conf.cnn_filter_size,
                             conf.cnn_kernel_size,
                             activation=activation,
                             padding="same",
                             name='initial-conv')(inputs_board)

        # AG0 way:
        for i in range(conf.residual_layers):
            layer = residual_block_v1(conf.cnn_filter_size,
                                      conf.cnn_kernel_size,
                                      prefix="ResLayer_%s_" % i,
                                      activation=activation)(layer)

    # policy
    ########
    # similar to AG0, but with multiple policy heads
    assert conf.multiple_policies
    number_of_policies = conf.role_count
    assert number_of_policies == len(conf.policy_dist_count)

    policy_heads = []
    for idx, count in enumerate(conf.policy_dist_count):
        # residual net -> flattened for policy head
        # XXX 2, should be based on size of policy...
        to_flatten = conv2d_block(2,
                                  1,
                                  name='to_flatten_policy_head_%s' % idx,
                                  activation=activation,
                                  padding='valid')(layer)

        flat = klayers.Flatten()(to_flatten)

        # output: policy head(s)
        if conf.dropout_rate_policy > 0:
            flat = klayers.Dropout(conf.dropout_rate_policy)(flat)

        head = klayers.Dense(count,
                             name="policy_%d" % idx,
                             activation="softmax")(flat)

        policy_heads.append(head)

    # value
    #######
    # XXX config abuse:

    if generation_descr.draw_head:
        num_value_heads = 3
    else:
        num_value_heads = 2

    value_v3 = conf.value_hidden_size == 0
    value_v2 = conf.value_hidden_size < 0
    if value_v3:
        assert conf.input_columns == conf.input_rows
        average_layer = layer
        dims = conf.input_columns
        while dims >= 5:
            if dims % 2 == 1:
                average_layer = klayers.AveragePooling2D(4, 1)(average_layer)
                dims -= 3
            else:
                average_layer = klayers.AveragePooling2D(2, 2)(average_layer)
                dims /= 2

        assert dims < conf.input_columns

        to_flatten1 = conv2d_block(32,
                                   1,
                                   name='reward_flatten1',
                                   activation=activation,
                                   do_bn=False,
                                   padding='valid')(average_layer)

        to_flatten2 = conv2d_block(1,
                                   1,
                                   name='reward_flatten2',
                                   activation=activation,
                                   do_bn=False,
                                   padding='valid')(layer)

        flat = klayers.concatenate(
            [klayers.Flatten()(to_flatten1),
             klayers.Flatten()(to_flatten2)])

        if conf.dropout_rate_value > 0:
            flat = klayers.Dropout(conf.dropout_rate_value)(flat)

        hidden = klayers.Dense(256, name="value_hidden")(flat)
        hidden = act(hidden, 'crelu', name="value_hidden_act")

        value_head = klayers.Dense(num_value_heads,
                                   activation="sigmoid",
                                   name="value")(hidden)

    elif value_v2:
        assert conf.input_columns == conf.input_rows
        output_layer = layer
        dims = conf.input_columns
        while dims > 5:
            if dims % 2 == 1:
                output_layer = klayers.AveragePooling2D(4, 1)(output_layer)
                dims -= 3
            else:
                output_layer = klayers.AveragePooling2D(2, 2)(output_layer)
                dims /= 2

        # XXX 16 - hardcoded
        to_flatten = klayers.Conv2D(16,
                                    1,
                                    name='to_flatten_value_head',
                                    padding='valid',
                                    activation=activation)(output_layer)

        if conf.dropout_rate_value > 0:
            to_flatten = klayers.Dropout(conf.dropout_rate_value)(to_flatten)

        flat = klayers.Flatten()(to_flatten)

        value_head = klayers.Dense(num_value_heads,
                                   activation="sigmoid",
                                   name="value")(flat)

    else:
        # old way, as per AG0
        to_flatten = conv2d_block(1,
                                  1,
                                  name='to_flatten_value_head',
                                  padding='valid',
                                  activation=activation)(layer)
        flat = klayers.Flatten()(to_flatten)

        hidden = klayers.Dense(conf.value_hidden_size,
                               name="value_hidden_layer",
                               activation="relu")(flat)

        if conf.dropout_rate_value > 0:
            hidden = klayers.Dropout(conf.dropout_rate_value)(hidden)

        value_head = klayers.Dense(num_value_heads,
                                   activation="sigmoid",
                                   name="value")(hidden)

    # model:
    outputs = policy_heads + [value_head]

    model = keras_models.Model(inputs=[inputs_board], outputs=outputs)

    # add in weight decay?  XXX rename conf to reflect it is weight decay and use +ve value instead
    # of hard coded value.
    # XXX this hasn't been tested

    if conf.l2_regularisation:
        for layer in model.layers:
            # XXX To get global weight decay in keras regularizers have to be added to every layer
            # in the model. In my models these layers are batch normalization (beta/gamma
            # regularizer) and dense/convolutions (W_regularizer/b_regularizer) layers.

            if hasattr(layer, 'kernel_regularizer'):
                # XXX too much?  Is it doubled from paper?  XXX 5e-3 ?
                layer.kernel_regularizer = keras_regularizers.l2(1e-4)

    return model
示例#5
0
def get_network_model(conf, generation_descr):
    assert isinstance(conf, confs.NNModelConfig)

    activation = 'leakyrelu' if conf.leaky_relu else 'relu'

    # inputs:
    if is_channels_first():
        inputs_board = klayers.Input(shape=(conf.input_channels,
                                            conf.input_columns,
                                            conf.input_rows),
                                     name="inputs_board")
    else:
        inputs_board = klayers.Input(shape=(conf.input_columns,
                                            conf.input_rows,
                                            conf.input_channels),
                                     name="inputs_board")

    # choose between resnet_v2 and resnet_v1
    if conf.resnet_v2:
        all_layers = []

        # initial conv2d/Resnet on cords
        layer = conv2d_block(conf.cnn_filter_size,
                             1,
                             activation=activation,
                             padding="same",
                             name='initial-conv')(inputs_board)

        all_layers.append(layer)

        if conf.squeeze_excite_layers:
            squeeze_excite = True
            dropout = None
        else:
            squeeze_excite = False
            dropout = 0.2

        for i in range(conf.residual_layers):
            layer = residual_block_v2(conf.cnn_filter_size,
                                      conf.cnn_kernel_size,
                                      2,
                                      prefix="ResLayer_%s_" % i,
                                      squeeze_excite=squeeze_excite,
                                      dropout=dropout,
                                      activation=activation)(layer)
            all_layers.append(layer)

    else:
        # AG0 way:
        assert not conf.squeeze_excite_layers

        # initial conv2d/Resnet on cords
        layer = conv2d_block(conf.cnn_filter_size,
                             conf.cnn_kernel_size,
                             activation=activation,
                             padding="same",
                             name='initial-conv')(inputs_board)

        # layers
        for i in range(conf.residual_layers):
            layer = residual_block_v1(conf.cnn_filter_size,
                                      conf.cnn_kernel_size,
                                      prefix="ResLayer_%s_" % i,
                                      activation=activation)(layer)

    # policy
    ########
    # similar to AG0, but with multiple policy heads
    number_of_policies = conf.role_count
    assert number_of_policies == len(conf.policy_dist_count)

    policy_heads = []
    for idx, count in enumerate(conf.policy_dist_count):
        # residual net -> flattened for policy head
        # XXX 2, should be based on size of policy...
        to_flatten = conv2d_block(2,
                                  1,
                                  name='to_flatten_policy_head_%s' % idx,
                                  activation=activation,
                                  padding='valid')(layer)

        flat = klayers.Flatten()(to_flatten)

        # output: policy head(s)
        if conf.dropout_rate_policy > 0:
            flat = klayers.Dropout(conf.dropout_rate_policy)(flat)

        head = klayers.Dense(count,
                             name="policy_%d" % idx,
                             activation="softmax")(flat)

        policy_heads.append(head)

    # value
    #######

    if generation_descr.draw_head:
        num_value_heads = 3
    else:
        num_value_heads = 2

    if conf.concat_all_layers:
        assert not conf.global_pooling_value

        all_to_flatten = []
        for idx, layer in enumerate(all_layers):
            to_flatten = conv2d_block(1,
                                      1,
                                      name='value_flatten_%s' % idx,
                                      activation=activation,
                                      padding='valid')(layer)
            all_to_flatten.append(to_flatten)
        flat = klayers.concatenate(
            [klayers.Flatten()(f) for f in all_to_flatten])

    elif conf.global_pooling_value:
        x = klayers.GlobalAveragePooling2D(name="value_average")(layer)
        to_flatten1 = klayers.Reshape((1, 1, conf.cnn_filter_size),
                                      name="value_reshape")(x)
        to_flatten2 = conv2d_block(1,
                                   1,
                                   name='value_flatten',
                                   activation=activation,
                                   padding='valid')(layer)

        flat = klayers.concatenate(
            [klayers.Flatten()(to_flatten1),
             klayers.Flatten()(to_flatten2)])

    else:
        # old way, as per AG0
        to_flatten = conv2d_block(1,
                                  1,
                                  name='value_flatten',
                                  activation=activation,
                                  do_bn=False,
                                  padding='valid')(layer)
        flat = klayers.Flatten()(to_flatten)

    if conf.dropout_rate_value > 0:
        flat = klayers.Dropout(conf.dropout_rate_value)(flat)

    hidden = klayers.Dense(conf.value_hidden_size,
                           name="value_hidden",
                           activation=activation)(flat)

    value_head = klayers.Dense(num_value_heads,
                               activation='softmax',
                               name="value")(hidden)

    # model:
    outputs = policy_heads + [value_head]

    return keras_models.Model(inputs=[inputs_board], outputs=outputs)
示例#6
0
文件: model.py 项目: vipmath/ggp-zero
def get_network_model(conf):
    assert isinstance(conf, confs.NNModelConfig)

    # fancy l2 regularizer stuff
    extra_params = {}
    if conf.l2_regularisation:
        extra_params["kernel_regularizer"] = keras_regularizers.l2(1e-4)

    activation = 'leakyrelu' if conf.leaky_relu else 'relu'

    # inputs:
    if is_channels_first():
        inputs_board = klayers.Input(shape=(conf.input_channels,
                                            conf.input_columns,
                                            conf.input_rows),
                                     name="inputs_board")
    else:
        inputs_board = klayers.Input(shape=(conf.input_columns,
                                            conf.input_rows,
                                            conf.input_channels),
                                     name="inputs_board")

    # initial conv2d/Resnet on cords
    layer = Conv2DBlock(conf.cnn_filter_size,
                        conf.cnn_kernel_size,
                        padding='same',
                        activation=activation,
                        name='initial',
                        **extra_params)(inputs_board)

    for i in range(conf.residual_layers):
        layer = ResidualBlock(conf.cnn_filter_size,
                              conf.cnn_kernel_size,
                              name="ResLayer_%s" % i,
                              activation=activation,
                              **extra_params)(layer)

    number_of_policies = 1
    if conf.multiple_policies:
        number_of_policies = conf.role_count

    assert number_of_policies == len(conf.policy_dist_count)

    policy_heads = []
    for idx, count in enumerate(conf.policy_dist_count):
        # residual net -> flattened for policy head
        to_flatten = Conv2DBlock(2,
                                 1,
                                 name='to_flatten_policy_head_%s' % idx,
                                 padding='valid',
                                 activation=activation,
                                 **extra_params)(layer)

        flat = klayers.Flatten()(to_flatten)

        # output: policy head(s)
        if conf.dropout_rate_policy > 0:
            flat = klayers.Dropout(conf.dropout_rate_policy)(flat)

        head = klayers.Dense(count,
                             activation="softmax",
                             name="policy_%d" % idx,
                             **extra_params)(flat)

        policy_heads.append(head)

    # residual net -> flattened for value head
    to_flatten = Conv2DBlock(1,
                             1,
                             name='to_flatten_value_head',
                             padding='valid',
                             activation=activation,
                             **extra_params)(layer)

    flat = klayers.Flatten()(to_flatten)

    # output: value head
    hidden = klayers.Dense(conf.value_hidden_size,
                           activation="relu",
                           name="value_hidden_layer",
                           **extra_params)(flat)

    if conf.dropout_rate_value > 0:
        hidden = klayers.Dropout(conf.dropout_rate_value)(hidden)

    value_head = klayers.Dense(conf.role_count,
                               activation="sigmoid",
                               name="value",
                               **extra_params)(hidden)

    # model:
    outputs = policy_heads + [value_head]
    return keras_models.Model(inputs=[inputs_board], outputs=outputs)