示例#1
0
文件: dsin.py 项目: zlin-zou/DeepCTR
def DSIN(dnn_feature_columns, sess_feature_list, sess_max_count=5, bias_encoding=False,
         att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0,
         dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, seed=1024, task='binary',
         ):
    """Instantiates the Deep Session Interest Network architecture.

    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param sess_feature_list: list,to indicate  sequence sparse field
    :param sess_max_count: positive int, to indicate the max number of sessions
    :param sess_len_max: positive int, to indicate the max length of each session
    :param bias_encoding: bool. Whether use bias encoding or postional encoding
    :param att_embedding_size: positive int, the embedding size of each attention head
    :param att_head_num: positive int, the number of attention head
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """

    hist_emb_size = sum(
        map(lambda fc: fc.embedding_dim, filter(lambda fc: fc.name in sess_feature_list, dnn_feature_columns)))

    if (att_embedding_size * att_head_num != hist_emb_size):
        raise ValueError(
            "hist_emb_size must equal to att_embedding_size * att_head_num ,got %d != %d *%d" % (
                hist_emb_size, att_embedding_size, att_head_num))

    features = build_input_features(dnn_feature_columns)

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []

    sparse_varlen_feature_columns = []
    history_fc_names = list(map(lambda x: "sess" + x, sess_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in history_fc_names:
            continue
        else:
            sparse_varlen_feature_columns.append(fc)

    inputs_list = list(features.values())

    user_behavior_input_dict = {}
    for idx in range(sess_max_count):
        sess_input = OrderedDict()
        for i, feat in enumerate(sess_feature_list):
            sess_input[feat] = features["sess_" + str(idx) + "_" + feat]

        user_behavior_input_dict["sess_" + str(idx)] = sess_input

    user_sess_length = Input(shape=(1,), name='sess_length')

    embedding_dict = {feat.embedding_name: Embedding(feat.vocabulary_size, feat.embedding_dim,
                                                     embeddings_initializer=feat.embeddings_initializer,
                                                     embeddings_regularizer=l2(
                                                         l2_reg_embedding),
                                                     name='sparse_emb_' +
                                                          str(i) + '-' + feat.name,
                                                     mask_zero=(feat.name in sess_feature_list)) for i, feat in
                      enumerate(sparse_feature_columns)}

    query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, sess_feature_list,
                                      sess_feature_list, to_list=True)
    dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
                                          mask_feat_list=sess_feature_list, to_list=True)
    dense_value_list = get_dense_input(features, dense_feature_columns)

    query_emb = concat_func(query_emb_list, mask=True)

    dnn_input_emb = Flatten()(concat_func(dnn_input_emb_list))

    tr_input = sess_interest_division(embedding_dict, user_behavior_input_dict, sparse_feature_columns,
                                      sess_feature_list, sess_max_count, bias_encoding=bias_encoding)

    Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False,
                                 use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(
        tr_input, sess_max_count, Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True,
                                                             supports_masking=False)(
        [query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(hist_emb_size,
                          layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True)(
        [query_emb, lstm_outputs, user_sess_length])

    dnn_input_emb = Concatenate()(
        [dnn_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer)])

    dnn_input_emb = combined_dnn_input([dnn_input_emb], dense_value_list)
    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input_emb)
    output = Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(get_inputs_list(
            [user_behavior_input_dict[sess_name]]))

    model = Model(inputs=inputs_list + [user_sess_length], outputs=output)

    return model
示例#2
0
文件: ncf.py 项目: zldeng/DeepMatch
def NCF(user_feature_columns,
        item_feature_columns,
        user_gmf_embedding_dim=20,
        item_gmf_embedding_dim=20,
        user_mlp_embedding_dim=20,
        item_mlp_embedding_dim=20,
        dnn_use_bn=False,
        dnn_hidden_units=(64, 16),
        dnn_activation='relu',
        l2_reg_dnn=0,
        l2_reg_embedding=1e-6,
        dnn_dropout=0,
        init_std=0.0001,
        seed=1024):
    """Instantiates the NCF Model architecture.

    :param user_feature_columns: A dict containing user's features and features'dim.
    :param item_feature_columns: A dict containing item's features and features'dim.
    :param user_gmf_embedding_dim: int.
    :param item_gmf_embedding_dim: int.
    :param user_mlp_embedding_dim: int.
    :param item_mlp_embedding_dim: int.
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :return: A Keras model instance.

    """

    user_dim = len(user_feature_columns) * user_gmf_embedding_dim
    item_dim = len(item_feature_columns) * item_gmf_embedding_dim
    dim = (user_dim * item_dim) / (math.gcd(user_dim, item_dim))
    user_gmf_embedding_dim = int(dim / len(user_feature_columns))
    item_gmf_embedding_dim = int(dim / len(item_feature_columns))

    # Generalized Matrix Factorization (GMF) Part
    user_gmf_feature_columns = [
        SparseFeat(feat,
                   vocabulary_size=size,
                   embedding_dim=user_gmf_embedding_dim)
        for feat, size in user_feature_columns.items()
    ]
    user_features = build_input_features(user_gmf_feature_columns)
    user_inputs_list = list(user_features.values())
    user_gmf_sparse_embedding_list, user_gmf_dense_value_list = input_from_feature_columns(
        user_features,
        user_gmf_feature_columns,
        l2_reg_embedding,
        init_std,
        seed,
        prefix='gmf_')
    user_gmf_input = combined_dnn_input(user_gmf_sparse_embedding_list, [])
    user_gmf_out = Lambda(lambda x: x,
                          name="user_gmf_embedding")(user_gmf_input)

    item_gmf_feature_columns = [
        SparseFeat(feat,
                   vocabulary_size=size,
                   embedding_dim=item_gmf_embedding_dim)
        for feat, size in item_feature_columns.items()
    ]
    item_features = build_input_features(item_gmf_feature_columns)
    item_inputs_list = list(item_features.values())
    item_gmf_sparse_embedding_list, item_gmf_dense_value_list = input_from_feature_columns(
        item_features,
        item_gmf_feature_columns,
        l2_reg_embedding,
        init_std,
        seed,
        prefix='gmf_')
    item_gmf_input = combined_dnn_input(item_gmf_sparse_embedding_list, [])
    item_gmf_out = Lambda(lambda x: x,
                          name="item_gmf_embedding")(item_gmf_input)

    gmf_out = Multiply()([user_gmf_out, item_gmf_out])

    # Multi-Layer Perceptron (MLP) Part
    user_mlp_feature_columns = [
        SparseFeat(feat,
                   vocabulary_size=size,
                   embedding_dim=user_mlp_embedding_dim)
        for feat, size in user_feature_columns.items()
    ]
    user_mlp_sparse_embedding_list, user_mlp_dense_value_list = input_from_feature_columns(
        user_features,
        user_mlp_feature_columns,
        l2_reg_embedding,
        init_std,
        seed,
        prefix='mlp_')
    user_mlp_input = combined_dnn_input(user_mlp_sparse_embedding_list,
                                        user_mlp_dense_value_list)
    user_mlp_out = Lambda(lambda x: x,
                          name="user_mlp_embedding")(user_mlp_input)

    item_mlp_feature_columns = [
        SparseFeat(feat,
                   vocabulary_size=size,
                   embedding_dim=item_mlp_embedding_dim)
        for feat, size in item_feature_columns.items()
    ]

    item_mlp_sparse_embedding_list, item_mlp_dense_value_list = input_from_feature_columns(
        item_features,
        item_mlp_feature_columns,
        l2_reg_embedding,
        init_std,
        seed,
        prefix='mlp_')
    item_mlp_input = combined_dnn_input(item_mlp_sparse_embedding_list,
                                        item_mlp_dense_value_list)
    item_mlp_out = Lambda(lambda x: x,
                          name="item_mlp_embedding")(item_mlp_input)

    mlp_input = Concatenate(axis=1)([user_mlp_out, item_mlp_out])
    mlp_out = DNN(dnn_hidden_units,
                  dnn_activation,
                  l2_reg_dnn,
                  dnn_dropout,
                  dnn_use_bn,
                  seed,
                  name="mlp_embedding")(mlp_input)

    # Fusion of GMF and MLP
    neumf_input = Concatenate(axis=1)([gmf_out, mlp_out])
    neumf_out = DNN(hidden_units=[1], activation='sigmoid')(neumf_input)
    output = Lambda(lambda x: x, name='neumf_out')(neumf_out)

    # output = PredictionLayer(task, False)(neumf_out)

    model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output)

    return model
示例#3
0
def cifar100_complicated_ensemble_v2(
        input_shape=None,
        input_tensor=None,
        n_classes=None,
        weights_path: Union[None, str] = None) -> Model:
    """
    Defines a cifar100 network.

    :param n_classes: used in order to be compatible with the main script.
    :param input_shape: the input shape of the network. Can be omitted if input_tensor is used.
    :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used.
    :param weights_path: a path to a trained custom network's weights.
    :return: Keras functional API Model.
    """
    outputs_list = []
    inputs = create_inputs(input_shape, input_tensor)

    # Generate Submodels.
    submodel1 = cifar100_complicated_ensemble_v2_submodel1(
        input_shape, input_tensor, 41, weights_path)
    submodel2 = cifar100_complicated_ensemble_v2_submodel2(
        input_shape, input_tensor, 41, weights_path)
    submodel3 = cifar100_complicated_ensemble_v2_submodel3(
        input_shape, input_tensor, 41, weights_path)
    submodel4 = cifar100_complicated_ensemble_v2_submodel4(
        input_shape, input_tensor, 41, weights_path)
    submodel5 = cifar100_complicated_ensemble_v2_submodel5(
        input_shape, input_tensor, 41, weights_path)

    # Get their outputs.
    outputs_submodel1 = submodel1(inputs)
    outputs_submodel2 = submodel2(inputs)
    outputs_submodel3 = submodel3(inputs)
    outputs_submodel4 = submodel4(inputs)
    outputs_submodel5 = submodel5(inputs)

    # Correct submodel 2 - 5 outputs.
    outputs_submodel2 = Crop(1, 1,
                             outputs_submodel2.shape[1])(outputs_submodel2)
    outputs_submodel3 = Crop(1, 1,
                             outputs_submodel3.shape[1])(outputs_submodel3)
    outputs_submodel4 = Crop(1, 1,
                             outputs_submodel4.shape[1])(outputs_submodel4)
    outputs_submodel5 = Crop(1, 1,
                             outputs_submodel5.shape[1])(outputs_submodel5)

    # Create the complicated outputs.
    # Classes 0-9.
    outputs_list.append(
        Average(name='classes_0-9')([
            Crop(1, 0, 10)(outputs_submodel1),
            Crop(1, 10, 20)(outputs_submodel5)
        ]))

    # Classes 10-39.
    outputs_list.append(
        Average(name='classes_10-39')([
            Crop(1, 10, 40)(outputs_submodel1),
            Crop(1, 0, 30)(outputs_submodel2)
        ]))

    # Classes 40-49.
    outputs_list.append(
        Average(name='classes_40-49')([
            Crop(1, 30, 40)(outputs_submodel2),
            Crop(1, 0, 10)(outputs_submodel3)
        ]))

    # Classes 50-59.
    outputs_list.append(
        Average(name='classes_50-59')([
            Crop(1, 10, 20)(outputs_submodel3),
            Crop(1, 0, 10)(outputs_submodel5)
        ]))

    # Classes 60-79.
    outputs_list.append(
        Average(name='classes_60-79')([
            Crop(1, 20, 40)(outputs_submodel3),
            Crop(1, 0, 20)(outputs_submodel4)
        ]))

    # Classes 80-99.
    outputs_list.append(
        Average(name='classes_80-99')([
            Crop(1, 20, 40)(outputs_submodel4),
            Crop(1, 10, 30)(outputs_submodel5)
        ]))

    # Concatenate all class predictions together.
    outputs = Concatenate(name='output')(outputs_list)
    outputs = Softmax(name='output_softmax')(outputs)

    # Create model.
    model = Model(inputs, outputs, name='cifar100_complicated_ensemble_v2')
    # Load weights, if they exist.
    load_weights(weights_path, model)

    return model
示例#4
0
 def act_blend(self, linear_input):
     full_conv_relu = Activation('relu')(linear_input)
     return full_conv_relu
     full_conv_sigmoid = Activation('sigmoid')(linear_input)
     full_conv = Concatenate()([full_conv_relu, full_conv_sigmoid])
     return full_conv
示例#5
0
def MIND(user_feature_columns,
         item_feature_columns,
         num_sampled=5,
         k_max=2,
         p=1.0,
         dynamic_k=False,
         user_dnn_hidden_units=(64, 32),
         dnn_activation='relu',
         dnn_use_bn=False,
         l2_reg_dnn=0,
         l2_reg_embedding=1e-6,
         dnn_dropout=0,
         init_std=0.0001,
         seed=1024):
    """Instantiates the MIND Model architecture.

    :param user_feature_columns: An iterable containing user's features used by  the model.
    :param item_feature_columns: An iterable containing item's features used by  the model.
    :param num_sampled: int, the number of classes to randomly sample per batch.
    :param k_max: int, the max size of user interest embedding
    :param p: float,the parameter for adjusting the attention distribution in LabelAwareAttention.
    :param dynamic_k: bool, whether or not use dynamic interest number
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower
    :param dnn_activation: Activation function to use in deep net
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param l2_reg_dnn:  L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout:  float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :return: A Keras model instance.

    """

    if len(item_feature_columns) > 1:
        raise ValueError("Now MIND only support 1 item feature like item_id")
    item_feature_column = item_feature_columns[0]
    item_feature_name = item_feature_column.name
    item_vocabulary_size = item_feature_columns[0].vocabulary_size
    item_embedding_dim = item_feature_columns[0].embedding_dim
    #item_index = Input(tensor=tf.constant([list(range(item_vocabulary_size))]))

    history_feature_list = [item_feature_name]

    features = build_input_features(user_feature_columns)
    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               user_feature_columns)) if user_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat),
               user_feature_columns)) if user_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               user_feature_columns)) if user_feature_columns else []
    history_feature_columns = []
    sparse_varlen_feature_columns = []
    history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in history_fc_names:
            history_feature_columns.append(fc)
        else:
            sparse_varlen_feature_columns.append(fc)
    seq_max_len = history_feature_columns[0].maxlen
    inputs_list = list(features.values())

    embedding_matrix_dict = create_embedding_matrix(user_feature_columns +
                                                    item_feature_columns,
                                                    l2_reg_embedding,
                                                    init_std,
                                                    seed,
                                                    prefix="")

    item_features = build_input_features(item_feature_columns)

    query_emb_list = embedding_lookup(embedding_matrix_dict,
                                      item_features,
                                      item_feature_columns,
                                      history_feature_list,
                                      history_feature_list,
                                      to_list=True)
    keys_emb_list = embedding_lookup(embedding_matrix_dict,
                                     features,
                                     history_feature_columns,
                                     history_fc_names,
                                     history_fc_names,
                                     to_list=True)
    dnn_input_emb_list = embedding_lookup(embedding_matrix_dict,
                                          features,
                                          sparse_feature_columns,
                                          mask_feat_list=history_feature_list,
                                          to_list=True)
    dense_value_list = get_dense_input(features, dense_feature_columns)

    sequence_embed_dict = varlen_embedding_lookup(
        embedding_matrix_dict, features, sparse_varlen_feature_columns)
    sequence_embed_list = get_varlen_pooling_list(
        sequence_embed_dict,
        features,
        sparse_varlen_feature_columns,
        to_list=True)

    dnn_input_emb_list += sequence_embed_list

    # keys_emb = concat_func(keys_emb_list, mask=True)
    # query_emb = concat_func(query_emb_list, mask=True)

    history_emb = PoolingLayer()(NoMask()(keys_emb_list))
    target_emb = PoolingLayer()(NoMask()(query_emb_list))

    #target_emb_size = target_emb.get_shape()[-1].value
    #max_len = history_emb.get_shape()[1].value
    hist_len = features['hist_len']

    high_capsule = CapsuleLayer(input_units=item_embedding_dim,
                                out_units=item_embedding_dim,
                                max_len=seq_max_len,
                                k_max=k_max)((history_emb, hist_len))

    if len(dnn_input_emb_list) > 0 or len(dense_value_list) > 0:
        user_other_feature = combined_dnn_input(dnn_input_emb_list,
                                                dense_value_list)

        other_feature_tile = tf.keras.layers.Lambda(
            tile_user_otherfeat, arguments={'k_max':
                                            k_max})(user_other_feature)

        user_deep_input = Concatenate()(
            [NoMask()(other_feature_tile), high_capsule])
    else:
        user_deep_input = high_capsule

    user_embeddings = DNN(user_dnn_hidden_units,
                          dnn_activation,
                          l2_reg_dnn,
                          dnn_dropout,
                          dnn_use_bn,
                          seed,
                          name="user_embedding")(user_deep_input)
    item_inputs_list = list(item_features.values())

    item_embedding_matrix = embedding_matrix_dict[item_feature_name]

    item_index = EmbeddingIndex(list(range(item_vocabulary_size)))(
        item_features[item_feature_name])

    item_embedding_weight = NoMask()(item_embedding_matrix(item_index))

    pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight])

    if dynamic_k:
        user_embedding_final = LabelAwareAttention(
            k_max=k_max,
            pow_p=p,
        )((user_embeddings, target_emb, hist_len))
    else:
        user_embedding_final = LabelAwareAttention(
            k_max=k_max,
            pow_p=p,
        )((user_embeddings, target_emb))

    output = SampledSoftmaxLayer(num_sampled=num_sampled)(
        inputs=(pooling_item_embedding_weight, user_embedding_final,
                item_features[item_feature_name]))
    model = Model(inputs=inputs_list + item_inputs_list, outputs=output)

    model.__setattr__("user_input", inputs_list)
    model.__setattr__("user_embedding", user_embeddings)

    model.__setattr__("item_input", item_inputs_list)
    model.__setattr__(
        "item_embedding",
        get_item_embedding(pooling_item_embedding_weight,
                           item_features[item_feature_name]))

    return model
示例#6
0
def RetinaNet(backbone,
              num_classes,
              input_shape,
              inputs=None,
              norm_method='whole_image',
              location=False,
              use_imagenet=False,
              pooling=None,
              required_channels=3,
              **kwargs):
    """Constructs a retinanet model using a backbone from keras-applications.

    Args:
        backbone: string, name of backbone to use.
        num_classes: Number of classes to classify.
        input_shape: The shape of the input data.
        weights: one of `None` (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        required_channels: integer, the required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        RetinaNet model with a backbone.
    """
    if inputs is None:
        inputs = Input(shape=input_shape)

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    if location:
        location = Location2D(in_shape=input_shape)(inputs)
        inputs = Concatenate(axis=channel_axis)([inputs, location])

    # force the channel size for backbone input to be `required_channels`
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[-1] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, **model_kwargs)

    # create the full model
    return retinanet(
        inputs=inputs,
        num_classes=num_classes,
        backbone_dict=backbone_dict,
        name='{}_retinanet'.format(backbone),
        **kwargs)
示例#7
0
def svhn_complicated_ensemble_v2(
        input_shape=None,
        input_tensor=None,
        n_classes=None,
        weights_path: Union[None, str] = None) -> Model:
    """
    Defines a svhn network.

    :param n_classes: used in order to be compatible with the main script.
    :param input_shape: the input shape of the network. Can be omitted if input_tensor is used.
    :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used.
    :param weights_path: a path to a trained custom network's weights.
    :return: Keras functional API Model.
    """
    outputs_list = []
    inputs = create_inputs(input_shape, input_tensor)

    # Generate Submodels.
    submodel1 = svhn_complicated_ensemble_v2_submodel1(input_shape,
                                                       input_tensor, 5,
                                                       weights_path)
    submodel2 = svhn_complicated_ensemble_v2_submodel2(input_shape,
                                                       input_tensor, 5,
                                                       weights_path)
    submodel3 = svhn_complicated_ensemble_v2_submodel3(input_shape,
                                                       input_tensor, 5,
                                                       weights_path)
    submodel4 = svhn_complicated_ensemble_v2_submodel4(input_shape,
                                                       input_tensor, 5,
                                                       weights_path)
    submodel5 = svhn_complicated_ensemble_v2_submodel5(input_shape,
                                                       input_tensor, 5,
                                                       weights_path)

    # Get their outputs.
    outputs_submodel1 = submodel1(inputs)
    outputs_submodel2 = submodel2(inputs)
    outputs_submodel3 = submodel3(inputs)
    outputs_submodel4 = submodel4(inputs)
    outputs_submodel5 = submodel5(inputs)

    # Correct submodel 2 - 5 outputs.
    outputs_submodel2 = Crop(1, 1,
                             outputs_submodel2.shape[1])(outputs_submodel2)
    outputs_submodel3 = Crop(1, 1,
                             outputs_submodel3.shape[1])(outputs_submodel3)
    outputs_submodel4 = Crop(1, 1,
                             outputs_submodel4.shape[1])(outputs_submodel4)
    outputs_submodel5 = Crop(1, 1,
                             outputs_submodel5.shape[1])(outputs_submodel5)

    # Create the complicated outputs.
    # Class 0.
    outputs_list.append(
        Average(name='class_0')([
            Crop(1, 0, 1)(outputs_submodel1),
            Crop(1, 1, 2)(outputs_submodel5)
        ]))

    # Classes 1, 2, 3.
    outputs_list.append(
        Average(name='classes_1_2_3')([
            Crop(1, 1, 4)(outputs_submodel1),
            Crop(1, 0, 3)(outputs_submodel2)
        ]))

    # Class 4.
    outputs_list.append(
        Average(name='class_4')([
            Crop(1, 3, 4)(outputs_submodel2),
            Crop(1, 0, 1)(outputs_submodel3)
        ]))

    # Class 5.
    outputs_list.append(
        Average(name='class_5')([
            Crop(1, 1, 2)(outputs_submodel3),
            Crop(1, 0, 1)(outputs_submodel5)
        ]))

    # Classes 6, 7.
    outputs_list.append(
        Average(name='classes_6_7')([
            Crop(1, 2, 4)(outputs_submodel3),
            Crop(1, 0, 2)(outputs_submodel4)
        ]))

    # Classes 8, 9.
    outputs_list.append(
        Average(name='classes_8_9')([
            Crop(1, 2, 4)(outputs_submodel4),
            Crop(1, 1, 3)(outputs_submodel5)
        ]))

    # Concatenate all class predictions together.
    outputs = Concatenate(name='output')(outputs_list)
    outputs = Softmax(name='output_softmax')(outputs)

    # Create model.
    model = Model(inputs, outputs, name='svhn_complicated_ensemble_v2')
    # Load weights, if they exist.
    load_weights(weights_path, model)

    return model
示例#8
0
 def Concatenate(self, input, axis=-1):
     x = Concatenate(axis=axis)(input)
     return x
示例#9
0
def PT_trans(name, P_N, PT_F, T, T_F, H, W, isPT_F):
    if 1:
        # Only poi need permute that time is permute by T_trans
        poi_in = Input(shape=(P_N, H, W))
        poi_in_p = Permute((2, 3, 1))(poi_in)
        # T_times/day + 7days/week
        time_in = Input(shape=(T + 7, H, W))
        # time_in_p = Permute((2, 3, 1))(time_in)

        if P_N >= 2:
            T_x0 = T_trans(T, T_F, H, W)(time_in)
            T_x1 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 3:
            T_x2 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 4:
            T_x3 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 5:
            T_x4 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 6:
            T_x5 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 7:
            T_x6 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 8:
            T_x7 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 9:
            T_x8 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 10:
            T_x9 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 11:
            T_x10 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 12:
            T_x11 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 13:
            T_x12 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 14:
            T_x13 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 15:
            T_x14 = T_trans(T, T_F, H, W)(time_in)
        if P_N >= 16:
            T_x15 = T_trans(T, T_F, H, W)(time_in)

        if P_N == 1:
            T_x = T_trans(T, T_F, H, W)(time_in)
        if P_N == 2:
            T_x = Concatenate(axis=3)([T_x0, T_x1])
        if P_N == 3:
            T_x = Concatenate(axis=3)([T_x0, T_x1, T_x2])
        if P_N == 4:
            T_x = Concatenate(axis=3)([T_x0, T_x1, T_x2, T_x3])
        if P_N == 5:
            T_x = Concatenate(axis=3)([T_x0, T_x1, T_x2, T_x3, T_x4])
        if P_N == 6:
            T_x = Concatenate(axis=3)([T_x0, T_x1, T_x2, T_x3, T_x4, T_x5])
        if P_N == 7:
            T_x = Concatenate(axis=3)(
                [T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6])
        if P_N == 8:
            T_x = Concatenate(axis=3)(
                [T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7])
        if P_N == 9:
            T_x = Concatenate(axis=3)(
                [T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8])
        if P_N == 10:
            T_x = Concatenate(axis=3)(
                [T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8, T_x9])
        if P_N == 11:
            T_x = Concatenate(axis=3)([
                T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8, T_x9,
                T_x10
            ])
        if P_N == 12:
            T_x = Concatenate(axis=3)([
                T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8, T_x9,
                T_x10, T_x11
            ])
        if P_N == 13:
            T_x = Concatenate(axis=3)([
                T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8, T_x9,
                T_x10, T_x11, T_x12
            ])
        if P_N == 14:
            T_x = Concatenate(axis=3)([
                T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8, T_x9,
                T_x10, T_x11, T_x12, T_x13
            ])
        if P_N == 15:
            T_x = Concatenate(axis=3)([
                T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8, T_x9,
                T_x10, T_x11, T_x12, T_x13, T_x14
            ])
        if P_N == 16:
            T_x = Concatenate(axis=3)([
                T_x0, T_x1, T_x2, T_x3, T_x4, T_x5, T_x6, T_x7, T_x8, T_x9,
                T_x10, T_x11, T_x12, T_x13, T_x14, T_x15
            ])

        poi_time = Multiply()([poi_in_p, T_x])
        # poi_time = Multiply()([poi_in, T_x])
        if isPT_F:
            poi_time = Conv2D(filters=PT_F, kernel_size=(1, 1),
                              padding="same")(poi_time)
            print('PT_F = YES')
        else:
            print('PT_F = NO')
        PT_model = Model(inputs=[poi_in, time_in], outputs=poi_time, name=name)

        return PT_model
示例#10
0
def DSIN(feature_dim_dict,
         sess_feature_list,
         embedding_size=8,
         sess_max_count=5,
         sess_len_max=10,
         att_embedding_size=1,
         att_head_num=8,
         dnn_hidden_units=(200, 80),
         dnn_activation='sigmoid',
         l2_reg_dnn=0,
         l2_reg_embedding=1e-6,
         task='binary',
         dnn_dropout=0,
         init_std=0.0001,
         seed=1024,
         bias_encoding=False):
    check_feature_config_dict(feature_dim_dict)

    sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
        feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in sess_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            sess_feature_list,
                                            sess_feature_list)

    query_emb = concat_fun(query_emb_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict["sparse"],
        mask_feat_list=sess_feature_list)
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Flatten()(NoMask()(deep_input_emb))

    tr_input = sess_interest_division(sparse_embedding_dict,
                                      user_behavior_input_dict,
                                      feature_dim_dict['sparse'],
                                      sess_feature_list,
                                      sess_max_count,
                                      bias_encoding=bias_encoding)

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=False,
                                 use_positional_encoding=(not bias_encoding),
                                 seed=seed,
                                 supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(tr_input, sess_max_count,
                                       Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True,
        supports_masking=False)([query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(
        len(sess_feature_list) * embedding_size,
        layers=2,
        res_layers=0,
        dropout_rate=0.2,
    )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True)([query_emb, lstm_outputs, user_sess_length])

    deep_input_emb = Concatenate()([
        deep_input_emb,
        Flatten()(interest_attention_layer),
        Flatten()(lstm_attention_layer)
    ])
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 False, seed)(deep_input_emb)
    output = Dense(1, use_bias=False, activation=None)(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    # sess_input_length_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(
            get_inputs_list([user_behavior_input_dict[sess_name]]))
        # sess_input_length_list.append(user_behavior_length_dict[sess_name])

    model_input_list = get_inputs_list(
        [sparse_input, dense_input]) + sess_input_list + [user_sess_length]

    model = Model(inputs=model_input_list, outputs=output)

    return model
示例#11
0
def rnn_train(space):  #functional
    ''' train lightgbm booster based on training / validaton set -> give predictions of Y '''
    params = space.copy()

    lookback = 20  # lookback = 5Y * 4Q = 20Q
    x_fields = 10  # lgbm top15 features -> 10 features in rnn

    inputs_loss_weight = 0.1  # loss weights for individual outputs from each rnn model
    dense_loss_weight = 2  # loss weights for final output
    loss_weights = [inputs_loss_weight] * x_fields + [
        dense_loss_weight
    ]  # loss weights for training

    loss = [args.objective] * (
        x_fields + 1)  # use MAE loss function for all inputs and final
    metrics = [args.objective] * (x_fields + 1)

    input_img = Input(shape=(lookback, x_fields))
    outputs = []
    states = []

    for col in range(10):  # build model for each feature

        g_1 = K.expand_dims(
            input_img[:, :, col], axis=2
        )  # add dimension to certain feature: shape = (samples, 20, 1)

        for i in range(params['num_gru_layer']):
            temp_nodes = int(
                min(params['gru_nodes'] * (2**(params['gru_nodes_mult'] * i)),
                    8))
            extra = dict(return_sequences=True)

            if args.bi == False:
                if i == params['num_gru_layer'] - 1:
                    extra = dict(return_sequences=False)
                    g_state = GRU(temp_nodes, **extra)(g_1)  # forecast state
                elif i == 0:
                    g_1 = GRU(temp_nodes, **extra)(g_1)
                else:
                    g_1 = GRU(temp_nodes,
                              dropout=params['gru_dropout'],
                              **extra)(g_1)

            else:  # try bidirectional one
                if i == params['num_gru_layer'] - 1:
                    extra = dict(return_sequences=False)
                    g_state = GRU(temp_nodes, **extra)(g_1)  # forecast state
                elif i == 0:
                    g_1 = Bidirectional(GRU(temp_nodes, **extra))(g_1)
                else:
                    g_1 = Bidirectional(
                        GRU(temp_nodes, dropout=params['gru_dropout'],
                            **extra))(g_1)

        g_output = Dense(1)(g_state)

        states.append(g_state)
        outputs.append(g_output)

    f_x = Concatenate(axis=1)(states)
    for i in range(
            params['num_dense_layer']):  # for second or third dense layers
        f_x = Dense(10)(f_x)

    f_x = Dense(1, name='final_dense')(f_x)

    outputs.append(f_x)
    model = Model(
        inputs=input_img,
        outputs=outputs)  # outputs = 10 forecast states + final forecast

    callbacks_list = [
        callbacks.ReduceLROnPlateau(monitor='val_loss',
                                    factor=0.1,
                                    patience=10),
        callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='auto')
    ]  # add callbacks
    lr_val = 10**-int(params['learning_rate'])
    adam = optimizers.Adam(lr=lr_val)
    model.compile(adam, loss=loss, metrics=metrics, loss_weights=loss_weights)
    model.summary()

    history = model.fit(X_train, [Y_train] * (x_fields + 1),
                        epochs=50,
                        batch_size=params['batch_size'],
                        validation_data=(X_valid, [Y_valid] * (x_fields + 1)),
                        verbose=1,
                        callbacks=callbacks_list)

    Y_test_pred = model.predict(X_test)[-1]  # final dense predictions
    Y_train_pred = model.predict(X_train)[-1]
    Y_valid_pred = model.predict(X_valid)[-1]

    return Y_test_pred, Y_train_pred, Y_valid_pred, history
示例#12
0
def DIN(
    feature_dim_dict,
    seq_feature_list,
    embedding_size=8,
    hist_len_max=16,
    use_din=True,
    use_bn=False,
    hidden_size=[200, 80],
    activation='relu',
    att_hidden_size=[80, 40],
    att_activation='sigmoid',
    att_weight_normalization=True,
    l2_reg_deep=0,
    l2_reg_embedding=1e-5,
    final_activation='sigmoid',
    keep_prob=1,
    init_std=0.0001,
    seed=1024,
):
    """Instantiates the Deep Interest Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
    :param seq_feature_list: list,to indicate  sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param hist_len_max: positive int, to indicate the max length of seq input
    :param use_din: bool, whether use din pooling or not.If set to ``False``,use **sum pooling**
    :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param activation: Activation function to use in deep net
    :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
    :param att_activation: Activation function to use in attention net
    :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
    :param l2_reg_deep: float. L2 regularizer strength applied to deep net
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
    :param keep_prob: float in (0,1]. keep_prob used in deep net
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :return: A Keras model instance.

    """
    for feature_dim_dict in [feature_dim_dict]:
        if not isinstance(
                feature_dim_dict, dict
        ) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
            raise ValueError(
                "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}"
            )
    if len(feature_dim_dict['dense']) > 0:
        raise ValueError('Now DIN only support sparse input')
    sparse_input, user_behavior_input, user_behavior_length = get_input(
        feature_dim_dict, seq_feature_list, hist_len_max)
    sparse_embedding_dict = {
        feat: Embedding(feature_dim_dict["sparse"][feat],
                        embedding_size,
                        embeddings_initializer=RandomNormal(mean=0.0,
                                                            stddev=init_std,
                                                            seed=seed),
                        embeddings_regularizer=l2(l2_reg_embedding),
                        name='sparse_emb_' + str(i) + '-' + feat)
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }
    query_emb_list = [
        sparse_embedding_dict[feat](sparse_input[feat])
        for feat in seq_feature_list
    ]
    keys_emb_list = [
        sparse_embedding_dict[feat](user_behavior_input[feat])
        for feat in seq_feature_list
    ]
    deep_input_emb_list = [
        sparse_embedding_dict[feat](sparse_input[feat])
        for feat in feature_dim_dict["sparse"]
    ]

    query_emb = Concatenate()(
        query_emb_list) if len(query_emb_list) > 1 else query_emb_list[0]
    keys_emb = Concatenate()(
        keys_emb_list) if len(keys_emb_list) > 1 else keys_emb_list[0]
    deep_input_emb = Concatenate()(deep_input_emb_list) if len(
        deep_input_emb_list) > 1 else deep_input_emb_list[0]

    if use_din:
        hist = AttentionSequencePoolingLayer(
            att_hidden_size,
            att_activation,
            weight_normalization=att_weight_normalization)(
                [query_emb, keys_emb, user_behavior_length])
    else:
        hist = SequencePoolingLayer(hist_len_max,
                                    'sum')([keys_emb, user_behavior_length])

    deep_input_emb = Concatenate()([deep_input_emb, hist])
    output = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn,
                 seed)(deep_input_emb)
    output = Dense(1, final_activation)(output)
    output = Reshape([1])(output)
    model_input_list = list(sparse_input.values()) + list(
        user_behavior_input.values()) + [user_behavior_length]

    model = Model(inputs=model_input_list, outputs=output)
    return model
示例#13
0
def Functional(fields=None,
               variables=None,
               hidden_layers=None,
               activation="tanh",
               output_activation="linear",
               res_net=False,
               kernel_initializer=None,
               bias_initializer=None,
               kernel_regularizer=None,
               bias_regularizer=None,
               trainable=True,
               **kwargs):
    # prepare hidden layers.
    if hidden_layers is None:
        hidden_layers = []
    else:
        hidden_layers = to_list(hidden_layers)
    # prepare kernel initializers.
    activations, def_biasinit, def_kerinit = \
        prepare_default_activations_and_initializers(
        len(hidden_layers) * [activation] + [output_activation]
    )
    if kernel_initializer is None:
        kernel_initializer = def_kerinit
    elif isinstance(kernel_initializer, (float, int)):
        kernel_initializer = default_weight_initializer(
            len(hidden_layers) * [activation] + [output_activation],
            'constant',
            scale=kernel_initializer)
    else:
        kernel_initializer = [
            kernel_initializer
            for l in len(hidden_layers) * [activation] + [output_activation]
        ]
    # prepare bias initializers.
    if bias_initializer is None:
        bias_initializer = def_biasinit
    elif isinstance(bias_initializer, (float, int)):
        bias_initializer = default_weight_initializer(
            len(hidden_layers) * [activation] + [output_activation],
            'constant',
            scale=bias_initializer)
    else:
        bias_initializer = [
            bias_initializer
            for l in len(hidden_layers) * [activation] + [output_activation]
        ]
    # prepare regularizers.
    kernel_regularizer = default_regularizer(kernel_regularizer)
    bias_regularizer = default_regularizer(bias_regularizer)
    # prepares fields.
    fields = to_list(fields)
    if all([isinstance(fld, str) for fld in fields]):
        output_fields = [
            Field(
                name=fld,
                kernel_initializer=kernel_initializer[-1],
                bias_initializer=bias_initializer[-1],
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer,
                trainable=trainable,
            ) for fld in fields
        ]
    elif all([validations.is_field(fld) for fld in fields]):
        output_fields = fields
    else:
        raise TypeError('Please provide a "list" of field names of' +
                        ' type "String" or "Field" objects.')
    # prepare inputs/outputs/layers.
    inputs = []
    layers = []
    variables = to_list(variables)
    if all([isinstance(var, MLPFunctional) for var in variables]):
        for var in variables:
            inputs += var.outputs
        # for var in variables:
        #     for lay in var.layers:
        #         layers.append(lay)
    else:
        raise TypeError(
            "Input error: Please provide a `list` of `sn.Variable`s. \n"
            "Provided - {}".format(variables))

    # Input layers.
    if len(inputs) == 1:
        net_input = inputs[0]
    else:
        layer = Concatenate(name=graph_unique_name('concat'))
        net_input = layer(inputs)

    # Define the output network.
    net = [net_input]

    # define the ResNet networks.
    if res_net is True:
        res_layers = []
        res_outputs = []
        for rl in ["U", "V", "H"]:
            layers.append(
                Dense(
                    hidden_layers[0],
                    kernel_initializer=kernel_initializer[0],
                    bias_initializer=bias_initializer[0],
                    kernel_regularizer=kernel_regularizer,
                    bias_regularizer=bias_regularizer,
                    trainable=trainable,
                    name=graph_unique_name("DRes" + rl +
                                           "{:d}b".format(hidden_layers[0]))))
            res_output = layers[-1](net_input)
            # Apply the activation.
            if activations[0].activation.__name__ != 'linear':
                layers.append(activations[0])
                res_outputs.append(layers[-1](res_output))
        net[-1] = res_outputs[-1]

    for nLay, nNeuron in enumerate(hidden_layers):
        # Add the layer.
        layer = Dense(nNeuron,
                      kernel_initializer=kernel_initializer[nLay],
                      bias_initializer=bias_initializer[nLay],
                      kernel_regularizer=kernel_regularizer,
                      bias_regularizer=bias_regularizer,
                      trainable=trainable,
                      name=graph_unique_name("D{:d}b".format(nNeuron)))
        layers.append(layer)
        net[-1] = layer(net[-1])
        # Apply the activation.
        if activations[
                nLay].activation.__name__ != 'linear':  #nLay<len(hidden_layers)-1 and
            layer = activations[nLay]
            layers.append(layer)
            net[-1] = layer(net[-1])
        # Add the resnet layer
        if res_net is True:
            layer = Lambda(lambda xs: (1 - xs[0]) * xs[1] + xs[0] * xs[2],
                           name=graph_unique_name("ResLayer"))
            net[-1] = layer([net[-1]] + res_outputs[:2])

    # Assign to the output variable
    if len(net) == 1:
        net_output = net[0]
    else:
        raise ValueError("Legacy for Enrichment: Must be updated. ")
        layer = Concatenate(name=graph_unique_name('concat'))
        net_output = layer(net)

    # Define the final outputs of each network
    functionals = []
    for out in output_fields:
        last_layers = [out]
        last_output = out(net_output)
        # add the activation on the output.
        if activations[-1].activation.__name__ != 'linear':
            layer = activations[-1]
            last_layers.append(layer)
            last_output = layer(last_output)
        # Construct functionals
        functionals.append(
            MLPFunctional(inputs, [last_output], layers + last_layers))

    return unpack_singleton(functionals)
示例#14
0
def _main(args):
    config_path = os.path.expanduser(args.config_path)
    weights_path = os.path.expanduser(args.weights_path)
    assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
        config_path)
    assert weights_path.endswith(
        '.weights'), '{} is not a .weights file'.format(weights_path)

    output_path = os.path.expanduser(args.output_path)
    assert output_path.endswith(
        '.h5'), 'output path {} is not a .h5 file'.format(output_path)
    output_root = os.path.splitext(output_path)[0]

    # Load weights and config.
    print('Loading weights.')
    weights_file = open(weights_path, 'rb')
    major, minor, revision = np.ndarray(
        shape=(3, ), dtype='int32', buffer=weights_file.read(12))
    if (major*10+minor)>=2 and major<1000 and minor<1000:
        seen = np.ndarray(shape=(1,), dtype='int64', buffer=weights_file.read(8))
    else:
        seen = np.ndarray(shape=(1,), dtype='int32', buffer=weights_file.read(4))
    print('Weights Header: ', major, minor, revision, seen)

    print('Parsing Darknet config.')
    unique_config_file = unique_config_sections(config_path)
    cfg_parser = configparser.ConfigParser()
    cfg_parser.read_file(unique_config_file)

    print('Creating Keras model.')
    input_layer = Input(shape=(None, None, 3))
    prev_layer = input_layer
    all_layers = []

    weight_decay = float(cfg_parser['net_0']['decay']
                         ) if 'net_0' in cfg_parser.sections() else 5e-4
    count = 0
    out_index = []
    for section in cfg_parser.sections():
        print('Parsing section {}'.format(section))
        if section.startswith('convolutional'):
            filters = int(cfg_parser[section]['filters'])
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            pad = int(cfg_parser[section]['pad'])
            activation = cfg_parser[section]['activation']
            batch_normalize = 'batch_normalize' in cfg_parser[section]

            padding = 'same' if pad == 1 and stride == 1 else 'valid'

            # Setting weights.
            # Darknet serializes convolutional weights as:
            # [bias/beta, [gamma, mean, variance], conv_weights]
            prev_layer_shape = K.int_shape(prev_layer)

            weights_shape = (size, size, prev_layer_shape[-1], filters)
            darknet_w_shape = (filters, weights_shape[2], size, size)
            weights_size = np.product(weights_shape)

            print('conv2d', 'bn'
                  if batch_normalize else '  ', activation, weights_shape)

            conv_bias = np.ndarray(
                shape=(filters, ),
                dtype='float32',
                buffer=weights_file.read(filters * 4))
            count += filters

            if batch_normalize:
                bn_weights = np.ndarray(
                    shape=(3, filters),
                    dtype='float32',
                    buffer=weights_file.read(filters * 12))
                count += 3 * filters

                bn_weight_list = [
                    bn_weights[0],  # scale gamma
                    conv_bias,  # shift beta
                    bn_weights[1],  # running mean
                    bn_weights[2]  # running var
                ]

            conv_weights = np.ndarray(
                shape=darknet_w_shape,
                dtype='float32',
                buffer=weights_file.read(weights_size * 4))
            count += weights_size

            # DarkNet conv_weights are serialized Caffe-style:
            # (out_dim, in_dim, height, width)
            # We would like to set these to Tensorflow order:
            # (height, width, in_dim, out_dim)
            conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
            conv_weights = [conv_weights] if batch_normalize else [
                conv_weights, conv_bias
            ]

            # Handle activation.
            act_fn = None
            if activation == 'leaky':
                pass  # Add advanced activation later.
            elif activation != 'linear':
                raise ValueError(
                    'Unknown activation function `{}` in section {}'.format(
                        activation, section))

            # Create Conv2D layer
            if stride>1:
                # Darknet uses left and top padding instead of 'same' mode
                prev_layer = ZeroPadding2D(((1,0),(1,0)))(prev_layer)
            conv_layer = (Conv2D(
                filters, (size, size),
                strides=(stride, stride),
                kernel_regularizer=l2(weight_decay),
                use_bias=not batch_normalize,
                weights=conv_weights,
                activation=act_fn,
                padding=padding))(prev_layer)

            if batch_normalize:
                conv_layer = (BatchNormalization(
                    weights=bn_weight_list))(conv_layer)
            prev_layer = conv_layer

            if activation == 'linear':
                all_layers.append(prev_layer)
            elif activation == 'leaky':
                act_layer = LeakyReLU(alpha=0.1)(prev_layer)
                prev_layer = act_layer
                all_layers.append(act_layer)

        elif section.startswith('route'):
            ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
            layers = [all_layers[i] for i in ids]
            if len(layers) > 1:
                print('Concatenating route layers:', layers)
                concatenate_layer = Concatenate()(layers)
                all_layers.append(concatenate_layer)
                prev_layer = concatenate_layer
            else:
                skip_layer = layers[0]  # only one layer to route
                all_layers.append(skip_layer)
                prev_layer = skip_layer

        elif section.startswith('maxpool'):
            size = int(cfg_parser[section]['size'])
            stride = int(cfg_parser[section]['stride'])
            all_layers.append(
                MaxPooling2D(
                    pool_size=(size, size),
                    strides=(stride, stride),
                    padding='same')(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('shortcut'):
            index = int(cfg_parser[section]['from'])
            activation = cfg_parser[section]['activation']
            assert activation == 'linear', 'Only linear activation supported.'
            all_layers.append(Add()([all_layers[index], prev_layer]))
            prev_layer = all_layers[-1]

        elif section.startswith('upsample'):
            stride = int(cfg_parser[section]['stride'])
            assert stride == 2, 'Only stride=2 supported.'
            all_layers.append(UpSampling2D(stride)(prev_layer))
            prev_layer = all_layers[-1]

        elif section.startswith('yolo'):
            out_index.append(len(all_layers)-1)
            all_layers.append(None)
            prev_layer = all_layers[-1]

        elif section.startswith('net'):
            pass

        else:
            raise ValueError(
                'Unsupported section header type: {}'.format(section))

    # Create and save model.
    if len(out_index)==0: out_index.append(len(all_layers)-1)
    model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index])
    print(model.summary())
    if args.weights_only:
        model.save_weights('{}'.format(output_path))
        print('Saved Keras weights to {}'.format(output_path))
    else:
        # model.save('{}'.format(output_path))
        model.save_weights('{}'.format(output_path))
        print('Saved Keras model to {}'.format(output_path))

    # Check to see if all weights have been read.
    remaining_weights = len(weights_file.read()) / 4
    weights_file.close()
    print('Read {} of {} from Darknet weights.'.format(count, count +
                                                       remaining_weights))
    if remaining_weights > 0:
        print('Warning: {} unused weights'.format(remaining_weights))

    if args.plot_model:
        plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
        print('Saved model plot to {}.png'.format(output_root))
示例#15
0
    def build_explanation_model(self,
                                input_dim,
                                output_dim,
                                loss,
                                downsample_factors=(1, )):
        num_indices, num_channels, steps, downsampling_factor =\
            MaskingUtil.get_input_constants(input_dim, downsample_factors)

        if downsampling_factor != 1 and num_indices is None:
            raise ValueError(
                "Attribution downsampling is not supported for variable length inputs. "
                "Please pad your data samples to the same size to use downsampling."
            )

        input_shape = (input_dim, ) if not isinstance(
            input_dim, collections.Sequence) else input_dim
        input_layer = Input(shape=input_shape)
        last_layer = self.build(input_layer)

        if num_indices is None:
            last_layer = Dense(1, activation="linear")(last_layer)
            last_layer = Flatten()(last_layer)  # None * None outputs
            last_layer = Lambda(
                K.softmax, output_shape=K.int_shape(last_layer))(last_layer)
        else:
            last_layer = Flatten()(last_layer)
            last_layer = Dense(num_indices, activation="softmax")(last_layer)

        # Prepare extra inputs for causal loss.
        all_auxiliary_outputs = Input(shape=(output_dim, ), name="all")
        all_but_one_auxiliary_outputs_input = Input(shape=(num_indices,
                                                           output_dim),
                                                    name="all_but_one")

        if num_indices is not None:
            all_but_one_auxiliary_outputs = Lambda(lambda x: tf.unstack(
                x, axis=1))(all_but_one_auxiliary_outputs_input)
            if K.int_shape(all_but_one_auxiliary_outputs_input)[1] == 1:
                all_but_one_auxiliary_outputs = [all_but_one_auxiliary_outputs]
        else:
            all_but_one_auxiliary_outputs = all_but_one_auxiliary_outputs_input

        all_but_one_auxiliary_outputs = Concatenate()(
            all_but_one_auxiliary_outputs)

        causal_loss_fun = CausalLoss(num_indices=num_indices,
                                     loss_function=loss)

        if downsampling_factor != 1:
            last_layer = Reshape(tuple(steps) + (1, ))(last_layer)

            if len(steps) == 1:
                # Add a dummy dimension to enable usage of __resize_images__.
                last_layer = Reshape(tuple(steps) + (1, 1))(last_layer)
                last_layer = Lambda(lambda x: resize_images(
                    x,
                    height_factor=downsample_factors[0],
                    width_factor=1,
                    data_format="channels_last"))(last_layer)
            elif len(steps) == 2:
                last_layer = Lambda(lambda x: resize_images(
                    x,
                    height_factor=downsample_factors[0],
                    width_factor=downsample_factors[1],
                    data_format="channels_last"))(last_layer)
            elif len(steps) == 3:
                last_layer = Lambda(lambda x: resize_volumes(
                    x,
                    depth_factor=downsample_factors[0],
                    height_factor=downsample_factors[1],
                    width_factor=downsample_factors[2],
                    data_format="channels_last"))(last_layer)
            else:
                raise ValueError(
                    "Attribution maps of larger dimensionality than 3D data are not currently supported. "
                    "Requested output dim was: {}.".format(len(steps)))

            attribution_shape = Validation.get_attribution_shape_from_input_shape(
                num_samples=1, input_dim=input_dim)[1:]
            collapsed_attribution_shape = (int(np.prod(attribution_shape)), )
            last_layer = Reshape(collapsed_attribution_shape)(last_layer)

            # Re-normalise to sum = 1 after resizing (sum = __downsampling_factor__ after resizing).
            last_layer = Lambda(lambda x: x / float(downsampling_factor))(
                last_layer)

        final_layer = Concatenate()(
            [last_layer, all_but_one_auxiliary_outputs, all_auxiliary_outputs])

        model = Model(inputs=[
            input_layer, all_auxiliary_outputs,
            all_but_one_auxiliary_outputs_input
        ],
                      outputs=final_layer)

        model = self.compile_model(model,
                                   main_losses=causal_loss_fun,
                                   learning_rate=self.learning_rate,
                                   optimizer=self.optimizer)

        prediction_model = Model(input_layer, last_layer)
        return model, prediction_model
示例#16
0
def DeepSTN(
    H=21,
    W=12,
    channel=2,  # H-map_height W-map_width channel-map_channel
    c=3,
    p=4,
    t=4,  # c-closeness p-period t-trend
    pre_F=64,
    conv_F=64,
    R_N=2,
    # pre_F-prepare_conv_featrue conv_F-resnet_conv_featrue R_N-resnet_number
    is_plus=True,  # use ResPlus or mornal convolution
    plus=8,
    rate=2,  # rate-pooling_rate
    is_pt=True,  # use PoI and Time or not
    P_N=6,
    T_F=28,
    PT_F=6,
    T=24,
    # P_N-poi_number T_F-time_feature PT_F-poi_time_feature T-T_times/day
    drop=0,
    is_summary=True,  # show detail
    lr=0.0002,
    kernel1=1,
    # kernel1 decides whether early-fusion uses conv_unit0 or conv_unit1, 1 recommended
    isPT_F=1
):  # isPT_F decides whether PT_model uses one more Conv after multiplying PoI and Time, 1 recommended

    all_channel = channel * (c + p + t)

    cut0 = int(0)
    cut1 = int(cut0 + channel * c)
    cut2 = int(cut1 + channel * p)
    cut3 = int(cut2 + channel * t)

    cpt_input = Input(shape=(all_channel, H, W))
    cpt_input_p = Permute((2, 3, 1))(cpt_input)

    c_input = Lambda(cpt_slice, arguments={
        'h1': cut0,
        'h2': cut1
    })(cpt_input_p)
    p_input = Lambda(cpt_slice, arguments={
        'h1': cut1,
        'h2': cut2
    })(cpt_input_p)
    t_input = Lambda(cpt_slice, arguments={
        'h1': cut2,
        'h2': cut3
    })(cpt_input_p)

    c_out1 = Conv2D(filters=pre_F, kernel_size=(1, 1), padding="same")(c_input)
    p_out1 = Conv2D(filters=pre_F, kernel_size=(1, 1), padding="same")(p_input)
    t_out1 = Conv2D(filters=pre_F, kernel_size=(1, 1), padding="same")(t_input)

    if is_pt:
        poi_in = Input(shape=(P_N, H, W))
        # poi_in_p = Permute((2, 3, 1))(poi_in)
        # T_times/day + 7days/week
        time_in = Input(shape=(T + 7, H, W))
        # time_in_p = Permute((2, 3, 1))(time_in)

        PT_model = PT_trans('PT_trans', P_N, PT_F, T, T_F, H, W, isPT_F)

        # poi_time = PT_model([poi_in_p, time_in_p])
        poi_time = PT_model([poi_in, time_in])
        # TODO: need permutation? seems not as PT_model permute that

        cpt_con1 = Concatenate(axis=3)([c_out1, p_out1, t_out1, poi_time])
        if kernel1:
            cpt = conv_unit1(pre_F * 3 + PT_F * isPT_F + P_N * (not isPT_F),
                             conv_F, drop, H, W)(cpt_con1)
        else:
            cpt = conv_unit0(pre_F * 3 + PT_F * isPT_F + P_N * (not isPT_F),
                             conv_F, drop, H, W)(cpt_con1)

    else:
        cpt_con1 = Concatenate(axis=3)([c_out1, p_out1, t_out1])
        if kernel1:
            cpt = conv_unit1(pre_F * 3, conv_F, drop, H, W)(cpt_con1)
        else:
            cpt = conv_unit0(pre_F * 3, conv_F, drop, H, W)(cpt_con1)

    if is_plus:
        for i in range(R_N):
            cpt = Res_plus('Res_plus_' + str(i + 1), conv_F, plus, rate, drop,
                           H, W)(cpt)

    else:
        for i in range(R_N):
            cpt = Res_normal('Res_normal_' + str(i + 1), conv_F, drop, H,
                             W)(cpt)

    cpt_conv2 = Activation('relu')(cpt)
    cpt_out2 = BatchNormalization()(cpt_conv2)
    cpt_conv1 = Dropout(drop)(cpt_out2)
    cpt_conv1 = Conv2D(filters=channel, kernel_size=(1, 1),
                       padding="same")(cpt_conv1)
    cpt_out1 = Activation('tanh')(cpt_conv1)

    if is_pt:
        DeepSTN_model = Model(inputs=[cpt_input, poi_in, time_in],
                              outputs=cpt_out1)
    else:
        DeepSTN_model = Model(inputs=cpt_input, outputs=cpt_out1)

    DeepSTN_model.compile(loss='mse',
                          optimizer=Adam(lr),
                          metrics=[metrics.rmse, metrics.mae])

    if is_summary:
        DeepSTN_model.summary()

    print('***** pre_F : ', pre_F)
    print('***** conv_F: ', conv_F)
    print('***** R_N   : ', R_N)

    print('***** plus  : ', plus * is_plus)
    print('***** rate  : ', rate * is_plus)

    print('***** P_N   : ', P_N * is_pt)
    print('***** T_F   : ', T_F * is_pt)
    print('***** PT_F  : ', PT_F * is_pt * isPT_F)
    print('***** T     : ', T)

    print('***** drop  : ', drop)

    return DeepSTN_model
示例#17
0
def define_nmt(hidden_size, batch_size, en_timesteps, en_vsize, sp_timesteps,
               sp_vsize):
    """ Defining a NMT model """

    # Define an input sequence and process it.

    if batch_size:
        encoder_inputs = Input(batch_shape=(batch_size, en_timesteps,
                                            en_vsize),
                               name='encoder_inputs')
        decoder_inputs = Input(batch_shape=(batch_size, sp_timesteps - 1,
                                            sp_vsize),
                               name='decoder_inputs')

    else:
        encoder_inputs = Input(shape=(en_timesteps, en_vsize),
                               name='encoder_inputs')
        decoder_inputs = Input(shape=(sp_timesteps - 1, sp_vsize),
                               name='decoder_inputs')

    # Encoder GRU

    encoder_gru = GRU(hidden_size,
                      return_sequences=True,
                      return_state=True,
                      name='encoder_gru')
    encoder_out, encoder_state = encoder_gru(encoder_inputs)

    # Set up the decoder GRU, using `encoder_states` as initial state.
    decoder_gru = GRU(hidden_size,
                      return_sequences=True,
                      return_state=True,
                      name='decoder_gru')
    decoder_out, decoder_state = decoder_gru(decoder_inputs,
                                             initial_state=encoder_state)

    # Attention layer
    attn_layer = AttentionLayer(name='attention_layer')
    attn_out, attn_states = attn_layer([encoder_out, decoder_out])

    # Concat attention input and decoder GRU output
    decoder_concat_input = Concatenate(
        axis=-1, name='concat_layer')([decoder_out, attn_out])

    # Dense layer

    dense = Dense(sp_vsize, activation='softmax', name='softmax_layer')
    dense_time = TimeDistributed(dense, name='time_distributed_layer')
    decoder_pred = dense_time(decoder_concat_input)

    # Full model
    full_model = Model(inputs=[encoder_inputs, decoder_inputs],
                       outputs=decoder_pred)
    full_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])
    full_model.summary()
    """ Inference model """

    batch_size = 1
    """ Encoder (Inference) model """

    encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps,
                                            en_vsize),
                               name='encoder_inf_inputs')
    encoder_inf_out, encoder_inf_state = encoder_gru(encoder_inf_inputs)
    encoder_model = Model(inputs=encoder_inf_inputs,
                          outputs=[encoder_inf_out, encoder_inf_state])
    """ Decoder (Inference) model """
    decoder_inf_inputs = Input(batch_shape=(batch_size, 1, sp_vsize),
                               name='decoder_word_inputs')
    encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps,
                                            hidden_size),
                               name='encoder_inf_states')
    decoder_init_state = Input(batch_shape=(batch_size, hidden_size),
                               name='decoder_init')

    decoder_inf_out, decoder_inf_state = decoder_gru(
        decoder_inf_inputs, initial_state=decoder_init_state)
    attn_inf_out, attn_inf_states = attn_layer(
        [encoder_inf_states, decoder_inf_out])
    decoder_inf_concat = Concatenate(
        axis=-1, name='concat')([decoder_inf_out, attn_inf_out])
    decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat)

    decoder_model = Model(
        inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs],
        outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state])

    return full_model, encoder_model, decoder_model
示例#18
0
    def __build_model(self):
        ''' Construct the model used to train the chatbot. '''
        encoder_inputs = Input(shape=(self.max_encoder_seq_length,
                                      self.num_encoder_tokens),
                               name='encoder_input')
        encoder_dropout = (TimeDistributed(
            Dropout(rate=DROPOUT_RATE, name='encoder_dropout'),
            name='time_distributed_dropout_encoder'))(encoder_inputs)
        encoder = GRU(LATENT_DIM,
                      return_sequences=True,
                      return_state=True,
                      name='encoder_gru')

        encoder_outputs, encoder_state = encoder(encoder_dropout)

        decoder_inputs = Input(shape=(self.max_decoder_seq_length,
                                      self.num_decoder_tokens),
                               name='decoder_input')
        decoder_dropout = (TimeDistributed(
            Dropout(rate=DROPOUT_RATE, name='decoder_dropout'),
            name='time_distributed_dropout_decoder'))(decoder_inputs)

        decoder_gru = GRU(LATENT_DIM,
                          return_sequences=True,
                          return_state=True,
                          name='decoder_gru')
        decoder_outputs, _ = decoder_gru(decoder_dropout,
                                         initial_state=encoder_state)

        # Attention mechanism
        attn_layer = AttentionLayer(name='attention_layer')
        attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])
        decoder_outputs = Concatenate(
            axis=-1, name='concat_layer')([decoder_outputs, attn_out])

        decoder_dense = Dense(self.num_decoder_tokens,
                              activation='softmax',
                              name='decoder_activation_softmax')
        decoder_outputs = TimeDistributed(
            decoder_dense,
            name='time_distributed_dense_activation')(decoder_outputs)

        self.model = Model(inputs=[encoder_inputs, decoder_inputs],
                           outputs=[decoder_outputs])
        self.model.compile(optimizer='rmsprop',
                           loss='categorical_crossentropy')
        self.model.summary()

        self.encoder_model = Model(inputs=encoder_inputs,
                                   outputs=[encoder_outputs, encoder_state])

        decoder_inf_inputs = Input(batch_shape=(None, 1,
                                                self.num_decoder_tokens),
                                   name='decoder_word_inputs')
        encoder_inf_states = Input(batch_shape=(None,
                                                self.max_encoder_seq_length,
                                                LATENT_DIM),
                                   name='encoder_inf_states')
        decoder_init_state = Input(batch_shape=(None, LATENT_DIM),
                                   name='decoder_init')

        decoder_outputs, decoder_state = decoder_gru(
            decoder_inf_inputs, initial_state=decoder_init_state)
        attn_out, attn_states = attn_layer(
            [encoder_inf_states, decoder_outputs])
        decoder_outputs = Concatenate(
            axis=-1, name='concat')([decoder_outputs, attn_out])
        decoder_outputs = TimeDistributed(decoder_dense)(decoder_outputs)

        self.decoder_model = Model(
            inputs=[
                encoder_inf_states, decoder_init_state, decoder_inf_inputs
            ],
            outputs=[decoder_outputs, attn_states, decoder_state])
示例#19
0
def Deeplabv3(weights='pascal_voc',
              input_tensor=None,
              input_shape=(512, 512, 3),
              classes=21,
              backbone='mobilenetv2',
              OS=16,
              alpha=1.,
              activation=None):
    """ Instantiates the Deeplabv3+ architecture

    Optionally loads weights pre-trained
    on PASCAL VOC or Cityscapes. This model is available for TensorFlow only.
    # Arguments
        weights: one of 'pascal_voc' (pre-trained on pascal voc),
            'cityscapes' (pre-trained on cityscape) or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images. None is allowed as shape/width
        classes: number of desired classes. PASCAL VOC has 21 classes, Cityscapes has 19 classes.
            If number of classes not aligned with the weights used, last layer is initialized randomly
        backbone: backbone to use. one of {'xception','mobilenetv2'}
        activation: optional activation to add to the top of the network.
            One of 'softmax', 'sigmoid' or None
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
        alpha: controls the width of the MobileNetV2 network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
            Used only for mobilenetv2 backbone. Pretrained is only available for alpha=1.

    # Returns
        A Keras model instance.

    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`

    """

    if not (weights in {'pascal_voc', 'cityscapes', None}):
        raise ValueError(
            'The `weights` argument should be either '
            '`None` (random initialization), `pascal_voc`, or `cityscapes` '
            '(pre-trained on PASCAL VOC)')

    if not (backbone in {'xception', 'mobilenetv2'}):
        raise ValueError('The `backbone` argument should be either '
                         '`xception`  or `mobilenetv2` ')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        img_input = input_tensor

    if backbone == 'xception':
        if OS == 8:
            entry_block3_stride = 1
            middle_block_rate = 2  # ! Not mentioned in paper, but required
            exit_block_rates = (2, 4)
            atrous_rates = (12, 24, 36)
        else:
            entry_block3_stride = 2
            middle_block_rate = 1
            exit_block_rates = (1, 2)
            atrous_rates = (6, 12, 18)

        x = Conv2D(32, (3, 3),
                   strides=(2, 2),
                   name='entry_flow_conv1_1',
                   use_bias=False,
                   padding='same')(img_input)
        x = BatchNormalization(name='entry_flow_conv1_1_BN')(x)
        x = Activation('relu')(x)

        x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
        x = BatchNormalization(name='entry_flow_conv1_2_BN')(x)
        x = Activation('relu')(x)

        x = _xception_block(x, [128, 128, 128],
                            'entry_flow_block1',
                            skip_connection_type='conv',
                            stride=2,
                            depth_activation=False)
        x, skip1 = _xception_block(x, [256, 256, 256],
                                   'entry_flow_block2',
                                   skip_connection_type='conv',
                                   stride=2,
                                   depth_activation=False,
                                   return_skip=True)

        x = _xception_block(x, [728, 728, 728],
                            'entry_flow_block3',
                            skip_connection_type='conv',
                            stride=entry_block3_stride,
                            depth_activation=False)
        for i in range(16):
            x = _xception_block(x, [728, 728, 728],
                                'middle_flow_unit_{}'.format(i + 1),
                                skip_connection_type='sum',
                                stride=1,
                                rate=middle_block_rate,
                                depth_activation=False)

        x = _xception_block(x, [728, 1024, 1024],
                            'exit_flow_block1',
                            skip_connection_type='conv',
                            stride=1,
                            rate=exit_block_rates[0],
                            depth_activation=False)
        x = _xception_block(x, [1536, 1536, 2048],
                            'exit_flow_block2',
                            skip_connection_type='none',
                            stride=1,
                            rate=exit_block_rates[1],
                            depth_activation=True)

    else:
        OS = 8
        first_block_filters = _make_divisible(32 * alpha, 8)
        x = Conv2D(first_block_filters,
                   kernel_size=3,
                   strides=(2, 2),
                   padding='same',
                   use_bias=False,
                   name='Conv')(img_input)
        x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x)
        x = Activation(relu6, name='Conv_Relu6')(x)

        x = _inverted_res_block(x,
                                filters=16,
                                alpha=alpha,
                                stride=1,
                                expansion=1,
                                block_id=0,
                                skip_connection=False)

        x = _inverted_res_block(x,
                                filters=24,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=1,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=24,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=2,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=3,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=4,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=32,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=5,
                                skip_connection=True)

        # stride in block 6 changed from 2 -> 1, so we need to use rate = 2
        x = _inverted_res_block(
            x,
            filters=64,
            alpha=alpha,
            stride=1,  # 1!
            expansion=6,
            block_id=6,
            skip_connection=False)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=7,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=8,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=9,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=10,
                                skip_connection=False)
        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=11,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=96,
                                alpha=alpha,
                                stride=1,
                                rate=2,
                                expansion=6,
                                block_id=12,
                                skip_connection=True)

        x = _inverted_res_block(
            x,
            filters=160,
            alpha=alpha,
            stride=1,
            rate=2,  # 1!
            expansion=6,
            block_id=13,
            skip_connection=False)
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=14,
                                skip_connection=True)
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=15,
                                skip_connection=True)

        x = _inverted_res_block(x,
                                filters=320,
                                alpha=alpha,
                                stride=1,
                                rate=4,
                                expansion=6,
                                block_id=16,
                                skip_connection=False)

    # end of feature extractor

    # branching for Atrous Spatial Pyramid Pooling

    # Image Feature branch
    shape_before = tf.shape(x)
    b4 = GlobalAveragePooling2D()(x)
    # from (b_size, channels)->(b_size, 1, 1, channels)
    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4)
    b4 = Conv2D(256, (1, 1),
                padding='same',
                use_bias=False,
                name='image_pooling')(b4)
    b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4)
    b4 = Activation('relu')(b4)
    # upsample. have to use compat because of the option align_corners
    size_before = tf.keras.backend.int_shape(x)
    b4 = Lambda(lambda x: tf.compat.v1.image.resize(
        x, size_before[1:3], method='bilinear', align_corners=True))(b4)
    # simple 1x1
    b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x)
    b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0)
    b0 = Activation('relu', name='aspp0_activation')(b0)

    # there are only 2 branches in mobilenetV2. not sure why
    if backbone == 'xception':
        # rate = 6 (12)
        b1 = SepConv_BN(x,
                        256,
                        'aspp1',
                        rate=atrous_rates[0],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 12 (24)
        b2 = SepConv_BN(x,
                        256,
                        'aspp2',
                        rate=atrous_rates[1],
                        depth_activation=True,
                        epsilon=1e-5)
        # rate = 18 (36)
        b3 = SepConv_BN(x,
                        256,
                        'aspp3',
                        rate=atrous_rates[2],
                        depth_activation=True,
                        epsilon=1e-5)

        # concatenate ASPP branches & project
        x = Concatenate()([b4, b0, b1, b2, b3])
    else:
        x = Concatenate()([b4, b0])

    x = Conv2D(256, (1, 1),
               padding='same',
               use_bias=False,
               name='concat_projection')(x)
    x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x)
    x = Activation('relu')(x)
    x = Dropout(0.1)(x)
    # DeepLab v.3+ decoder

    if backbone == 'xception':
        # Feature projection
        # x4 (x2) block
        size_before2 = tf.keras.backend.int_shape(x)
        x = Lambda(lambda xx: tf.compat.v1.image.resize(
            xx, skip1.shape[1:3], method='bilinear', align_corners=True))(x)

        dec_skip1 = Conv2D(48, (1, 1),
                           padding='same',
                           use_bias=False,
                           name='feature_projection0')(skip1)
        dec_skip1 = BatchNormalization(name='feature_projection0_BN',
                                       epsilon=1e-5)(dec_skip1)
        dec_skip1 = Activation('relu')(dec_skip1)
        x = Concatenate()([x, dec_skip1])
        x = SepConv_BN(x,
                       256,
                       'decoder_conv0',
                       depth_activation=True,
                       epsilon=1e-5)
        x = SepConv_BN(x,
                       256,
                       'decoder_conv1',
                       depth_activation=True,
                       epsilon=1e-5)

    # you can use it with arbitary number of classes
    if (weights == 'pascal_voc' and classes == 21) or (weights == 'cityscapes'
                                                       and classes == 19):
        last_layer_name = 'logits_semantic'
    else:
        last_layer_name = 'custom_logits_semantic'  #最后一层改变数量即可.

    x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x)
    size_before3 = tf.keras.backend.int_shape(img_input)
    x = Lambda(lambda xx: tf.compat.v1.image.resize(
        xx,  #利用这行代码变换回之前的size
        size_before3[1:3],
        method='bilinear',
        align_corners=True))(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    if activation in {'softmax', 'sigmoid'}:
        x = tf.keras.layers.Activation(activation)(x)

    model = Model(inputs, x, name='deeplabv3plus')
    #最后是一个类似分类模型.输出19个512,512的图片.每一个图片表示一个遮罩图.
    # load weights

    if weights == 'pascal_voc':
        if backbone == 'xception':
            weights_path = get_file(
                'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH_X,
                cache_subdir='models')
        else:
            weights_path = get_file(
                'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH_MOBILE,
                cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    elif weights == 'cityscapes':
        if backbone == 'xception':  #下行的cache_subdir必须写绝对路径.
            weights_path = get_file(
                'deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5',
                WEIGHTS_PATH_X_CS,
                cache_dir='models',  #表示文件夹所在的地方
                cache_subdir='.')  #表示没有下一级文件夹
        else:
            weights_path = get_file(
                'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5',
                WEIGHTS_PATH_MOBILE_CS,
                cache_dir='models',  # 表示文件夹所在的地方
                cache_subdir='.')
        model.load_weights(weights_path, by_name=True)
    return model
示例#20
0
    def __init__(self, name, model_settings):
        self.sess = tf.get_default_session()
        assert (self.sess is not None)
        self.name = name
        self.s_dim = model_settings['state_dim']
        self.state_bound = model_settings['state_bound']
        self.g_dim = model_settings['goal_dim']
        self.goal_bound = model_settings['goal_bound']
        self.a_dim = model_settings['action_dim']
        self.action_bound = model_settings['action_bound']
        self.learning_rate = model_settings['model_dynamics_learning_rate']
        self.tau = model_settings['actor_tau']
        self.batch_size = model_settings['minibatch_size']
        self.state_goal_gamma = 0.5
        self.se_cos_gamma = 1.0

        y_max = [y[1] for y in self.state_bound]
        y_min = [y[0] for y in self.state_bound]
        self._k_state = 2. / (np.subtract(y_max, y_min))
        self._b_state = 1. - np.array(y_max) * self._k_state

        y_max = [y[1] for y in self.goal_bound]
        y_min = [y[0] for y in self.goal_bound]
        self._k_goal = 2. / (np.subtract(y_max, y_min))
        self._b_goal = 1. - np.array(y_max) * self._k_goal
        # Model Dynamics Network
        with tf.variable_scope(self.name + '_model_dynamics'):
            self.inputs_state,\
            self.inputs_goal,\
            self.inputs_action,\
            self.state_out,\
            self.scaled_state_out,\
            self.goal_out,\
            self.scaled_goal_out\
                = self.create_model_dynamics_network()
            self.network_params = tf.trainable_variables(scope=self.name +
                                                         '_model_dynamics')

        self.ground_truth_state_out = tf.placeholder(tf.float32,
                                                     [None, self.s_dim])
        self.ground_truth_goal_out = tf.placeholder(tf.float32,
                                                    [None, self.g_dim])
        self.ground_truth_out = Concatenate()(
            [self.ground_truth_state_out, self.ground_truth_goal_out])

        self.scaled_out = Concatenate()(
            [self.scaled_state_out, self.scaled_goal_out])

        # Loss Ops
        self.state_mse_loss = tf.losses.mean_squared_error(
            self.ground_truth_state_out, self.scaled_state_out)
        self.state_cos_loss = tf.losses.cosine_distance(
            tf.nn.l2_normalize(self.ground_truth_state_out - self.inputs_state,
                               axis=1),
            tf.nn.l2_normalize(self.scaled_state_out - self.inputs_state,
                               axis=1),
            axis=1)
        self.state_loss = tf.add(
            self.state_mse_loss * self.se_cos_gamma,
            self.state_cos_loss * (1. - self.se_cos_gamma))

        self.goal_mse_loss = tf.losses.mean_squared_error(
            self.ground_truth_goal_out, self.scaled_goal_out)
        self.goal_cos_loss = tf.losses.cosine_distance(
            tf.nn.l2_normalize(self.ground_truth_goal_out - self.inputs_goal,
                               axis=1),
            tf.nn.l2_normalize(self.scaled_goal_out - self.inputs_goal,
                               axis=1),
            axis=1)
        self.goal_loss = tf.add(self.goal_mse_loss * self.se_cos_gamma,
                                self.goal_cos_loss * (1. - self.se_cos_gamma))

        # self.loss = tf.add(self.state_loss * self.state_goal_gamma, self.goal_loss * (1. - self.state_goal_gamma))
        self.loss = self.state_loss + self.goal_loss

        # Optimize op
        self.optimize = tf.train.GradientDescentOptimizer(
            self.learning_rate).minimize(self.loss)

        # Acion gradients extraction
        self.policy_goal_loss = tf.reduce_mean(
            tf.squared_difference(self.ground_truth_goal_out,
                                  self.scaled_goal_out))
        self.action_grads = tf.gradients(self.policy_goal_loss,
                                         self.inputs_action)
        # Real action gradients
        self.ground_truth_actions = tf.placeholder(tf.float32,
                                                   [None, self.a_dim])
        self.policy_action_loss = tf.reduce_mean(
            tf.squared_difference(self.ground_truth_actions,
                                  self.inputs_action))
        self.ground_truth_action_grads = tf.gradients(self.policy_action_loss,
                                                      self.inputs_action)

        self.num_trainable_vars = len(self.network_params)
示例#21
0
    def prediction(self, image_input):
        l2_reg = l2(10**(-9))
        default_init = glorot_normal(seed=None)

        def se_net(in_block, depth):
            x = GlobalAveragePooling2D()(in_block)
            x = Dense(depth // 16,
                      activation='relu',
                      kernel_initializer=default_init,
                      bias_initializer='zeros')(x)
            x = Dense(depth, activation='sigmoid',
                      kernel_regularizer=l2_reg)(x)
            return Multiply()([in_block, x])

        # single image frame processing
        # entry
        conv1 = Conv2D(32,
                       kernel_size=3,
                       strides=2,
                       padding="same",
                       activation="relu",
                       kernel_initializer=default_init,
                       name="initial_3x3_conv_1")(image_input)
        conv1 = Conv2D(32,
                       kernel_size=3,
                       strides=1,
                       padding="same",
                       activation="relu",
                       kernel_initializer=default_init,
                       name="initial_3x3_conv_2")(conv1)
        conv1 = Conv2D(32,
                       kernel_size=3,
                       strides=1,
                       padding="same",
                       activation="relu",
                       activity_regularizer=l2_reg,
                       kernel_initializer=default_init,
                       name="initial_3x3_conv_3")(conv1)
        conv1_pool = MaxPooling2D(2,
                                  strides=2,
                                  padding='valid',
                                  name='stem_pool_1')(conv1)
        conv1_3 = Conv2D(32,
                         kernel_size=3,
                         strides=2,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='conv1_reduced_1')(conv1)
        conv1 = Concatenate()([conv1_3, conv1_pool])
        conv1_3 = Conv2D(64,
                         kernel_size=1,
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='stem_3x3_pre_conv')(conv1)
        conv1_3 = Conv2D(96,
                         kernel_size=3,
                         strides=1,
                         padding='same',
                         activation='relu',
                         activity_regularizer=l2_reg,
                         kernel_initializer=default_init,
                         name='stem_3x3_conv')(conv1_3)
        conv1_7 = Conv2D(64,
                         kernel_size=1,
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='stem_7x7_pre_conv')(conv1)
        conv1_7 = Conv2D(64,
                         kernel_size=[7, 1],
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='stem_7x7_conv_factor_1')(conv1_7)
        conv1_7 = Conv2D(64,
                         kernel_size=[1, 7],
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='stem_7x7_conv_factor_2')(conv1_7)
        conv1_7 = Conv2D(96,
                         kernel_size=3,
                         strides=1,
                         padding='same',
                         activation='relu',
                         activity_regularizer=l2_reg,
                         kernel_initializer=default_init,
                         name='stem_7x7_post_conv')(conv1_7)
        conv1 = Concatenate()([conv1_3, conv1_7])
        conv1_pool = MaxPooling2D(2,
                                  strides=2,
                                  padding='valid',
                                  name='stem_pool_2')(conv1)
        conv1_3 = Conv2D(192,
                         kernel_size=3,
                         strides=2,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='conv1_reduced_2')(conv1)
        conv1 = Concatenate()([conv1_3, conv1_pool])
        conv1 = se_net(conv1, depth=384)

        # middle flow
        # Inception-Resnet Block A
        depth = 384
        conv2 = conv1
        for i in range(3):
            conv1 = Conv2D(depth,
                           kernel_size=1,
                           strides=1,
                           padding='valid',
                           activity_regularizer=l2_reg,
                           kernel_initializer=default_init,
                           name='block_A_base_{}'.format(i))(conv1)

            conv2_1 = Conv2D(128,
                             kernel_size=1,
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_A_1x1_conv_{}'.format(i))(conv2)

            conv2_3 = Conv2D(64,
                             kernel_size=1,
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_A_3x3_pre_conv_{}'.format(i))(conv2)
            conv2_3 = Conv2D(128,
                             kernel_size=3,
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_A_3x3_conv_{}'.format(i))(conv2_3)

            conv2_7 = Conv2D(32,
                             kernel_size=1,
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_A_7x7_pre_conv_1_{}'.format(i))(conv2)
            conv2_7 = Conv2D(
                64,
                kernel_size=3,
                strides=1,
                padding='same',
                activation='relu',
                kernel_initializer=default_init,
                name='block_A_7x7_pre_conv_2_{}'.format(i))(conv2_7)
            conv2_7 = Conv2D(128,
                             kernel_size=3,
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_A_7x7_conv_{}'.format(i))(conv2_7)

            res_conv = Concatenate()([conv2_1, conv2_3, conv2_7])
            res_conv = Conv2D(
                depth,
                kernel_size=1,
                strides=1,
                padding='same',
                activation='linear',
                kernel_initializer=default_init,
                name='block_A_res_conv_projection_{}'.format(i))(res_conv)

            conv1 = Add(name="block_A_final_add_{}".format(i))(
                [res_conv, conv1])
            conv1 = se_net(conv1, depth=depth)
            conv2 = conv1

        # Inception-Resnet Reduction A
        conv2_pool = MaxPooling2D(2,
                                  strides=2,
                                  padding='valid',
                                  name='red_A_pool_2')(conv2)
        conv2_3 = Conv2D(depth,
                         kernel_size=3,
                         strides=2,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='red_A_conv_3')(conv2)
        conv2_7 = Conv2D(256,
                         kernel_size=1,
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='red_A_pre_conv_7')(conv2)
        conv2_7 = Conv2D(256,
                         kernel_size=3,
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='red_A_conv_7_factor_1')(conv2_7)
        conv2_7 = Conv2D(depth,
                         kernel_size=3,
                         strides=2,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='red_A_conv_7_factor_2')(conv2_7)
        conv2 = Concatenate()([conv2_pool, conv2_3, conv2_7])

        # Inception-Resnet Block B
        depth = 1024
        conv1 = conv2
        for i in range(3):
            conv1 = Conv2D(depth,
                           kernel_size=1,
                           strides=1,
                           padding='valid',
                           activity_regularizer=l2_reg,
                           kernel_initializer=default_init,
                           name='block_B_base_{}'.format(i))(conv1)

            conv2_1 = Conv2D(192,
                             kernel_size=1,
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_B_1x1_conv_{}'.format(i))(conv2)

            conv2_7 = Conv2D(128,
                             kernel_size=1,
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_B_7x7_pre_conv_1_{}'.format(i))(conv2)
            conv2_7 = Conv2D(
                160,
                kernel_size=[1, 7],
                strides=1,
                padding='same',
                activation='relu',
                kernel_initializer=default_init,
                name='block_B_7x7_pre_conv_2_{}'.format(i))(conv2_7)
            conv2_7 = Conv2D(192,
                             kernel_size=[7, 1],
                             strides=1,
                             padding='same',
                             activation='relu',
                             kernel_initializer=default_init,
                             name='block_B_7x7_conv_{}'.format(i))(conv2_7)

            res_conv = Concatenate()([conv2_1, conv2_7])
            res_conv = Conv2D(
                depth,
                kernel_size=1,
                strides=1,
                padding='same',
                activation='linear',
                kernel_initializer=default_init,
                name='block_B_res_conv_projection_{}'.format(i))(res_conv)

            conv1 = Add(name="block_B_final_add_{}".format(i))(
                [res_conv, conv1])

            conv1 = se_net(conv1, depth=depth)
            conv2 = conv1

        # Inception-Resnet Reduction B
        conv2_pool = MaxPooling2D(3,
                                  strides=2,
                                  padding='same',
                                  name='red_B_pool_2')(conv2)
        conv2_3_1 = Conv2D(256,
                           kernel_size=1,
                           strides=1,
                           padding='same',
                           activation='relu',
                           kernel_initializer=default_init,
                           name='red_B_pre_conv_3_1')(conv2)
        conv2_3_1 = Conv2D(384,
                           kernel_size=3,
                           strides=2,
                           padding='same',
                           activity_regularizer=l2_reg,
                           activation='relu',
                           kernel_initializer=default_init,
                           name='red_B_conv_3_1')(conv2_3_1)
        conv2_3_2 = Conv2D(256,
                           kernel_size=1,
                           strides=1,
                           padding='same',
                           activation='relu',
                           kernel_initializer=default_init,
                           name='red_B_pre_conv_3_2')(conv2)
        conv2_3_2 = Conv2D(288,
                           kernel_size=3,
                           strides=2,
                           padding='same',
                           activity_regularizer=l2_reg,
                           activation='relu',
                           kernel_initializer=default_init,
                           name='red_B_conv_3_2')(conv2_3_2)
        conv2_7 = Conv2D(256,
                         kernel_size=1,
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='red_B_pre_conv_7')(conv2)
        conv2_7 = Conv2D(288,
                         kernel_size=3,
                         strides=1,
                         padding='same',
                         activation='relu',
                         kernel_initializer=default_init,
                         name='red_B_conv_7_factor_1')(conv2_7)
        conv2_7 = Conv2D(320,
                         kernel_size=3,
                         strides=2,
                         padding='same',
                         activity_regularizer=l2_reg,
                         activation='relu',
                         kernel_initializer=default_init,
                         name='red_B_conv_7_factor_2')(conv2_7)
        conv2 = Concatenate()([conv2_pool, conv2_3_1, conv2_3_2, conv2_7])

        # exit
        return conv2
示例#22
0
def InceptionResNetV2(include_top=True,
                      weights='imagenet',
                      input_tensor=None,
                      input_shape=None,
                      pooling=None,
                      classes=1000):
    """Instantiates the Inception-ResNet v2 architecture.
    Optionally loads weights pre-trained on ImageNet.
    Note that when using TensorFlow, for best performance you should
    set `"image_data_format": "channels_last"` in your Keras config
    at `~/.keras/keras.json`.
    The model and the weights are compatible with TensorFlow, Theano and
    CNTK backends. The data format convention used by the model is
    the one specified in your Keras config file.
    Note that the default input image size for this model is 299x299, instead
    of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing
    function is different (i.e., do not use `imagenet_utils.preprocess_input()`
    with this model. Use `preprocess_input()` defined in this module instead).
    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is `False` (otherwise the input shape
            has to be `(299, 299, 3)` (with `'channels_last'` data format)
            or `(3, 299, 299)` (with `'channels_first'` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 139.
            E.g. `(150, 150, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the last convolutional layer.
            - `'avg'` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `'max'` means that global max pooling will be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is `True`, and
            if no `weights` argument is specified.
    # Returns
        A Keras `Model` instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError('If using `weights` as imagenet with `include_top`'
                         ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=299,
                                      min_size=139,
                                      data_format=K.image_data_format(),
                                      require_flatten=False,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    # Stem block: 35 x 35 x 192
    x = conv2d_bn(img_input, 32, 3, strides=2, padding='same')
    x = conv2d_bn(x, 32, 3, padding='same')
    x = conv2d_bn(x, 64, 3)
    x = MaxPooling2D(3, strides=2, padding='same')(x)
    x = conv2d_bn(x, 80, 1, padding='same')
    x = conv2d_bn(x, 192, 3, padding='same')
    x = MaxPooling2D(3, strides=2, padding='same')(x)

    # Mixed 5b (Inception-A block): 35 x 35 x 320
    branch_0 = conv2d_bn(x, 96, 1)
    branch_1 = conv2d_bn(x, 48, 1)
    branch_1 = conv2d_bn(branch_1, 64, 5)
    branch_2 = conv2d_bn(x, 64, 1)
    branch_2 = conv2d_bn(branch_2, 96, 3)
    branch_2 = conv2d_bn(branch_2, 96, 3)
    branch_pool = AveragePooling2D(3, strides=1, padding='same')(x)
    branch_pool = conv2d_bn(branch_pool, 64, 1)
    branches = [branch_0, branch_1, branch_2, branch_pool]
    channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
    x = Concatenate(axis=channel_axis, name='mixed_5b')(branches)

    # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
    for block_idx in range(1, 11):
        x = inception_resnet_block(x,
                                   scale=0.17,
                                   block_type='block35',
                                   block_idx=block_idx)

    # Mixed 6a (Reduction-A block): 17 x 17 x 1088
    branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='same')
    branch_1 = conv2d_bn(x, 256, 1)
    branch_1 = conv2d_bn(branch_1, 256, 3)
    branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='same')
    branch_pool = MaxPooling2D(3, strides=2, padding='same')(x)
    branches = [branch_0, branch_1, branch_pool]
    x = Concatenate(axis=channel_axis, name='mixed_6a')(branches)

    # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
    for block_idx in range(1, 21):
        x = inception_resnet_block(x,
                                   scale=0.1,
                                   block_type='block17',
                                   block_idx=block_idx)

    # Mixed 7a (Reduction-B block): 8 x 8 x 2080
    branch_0 = conv2d_bn(x, 256, 1)
    branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='same')
    branch_1 = conv2d_bn(x, 256, 1)
    branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='same')
    branch_2 = conv2d_bn(x, 256, 1)
    branch_2 = conv2d_bn(branch_2, 288, 3)
    branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='same')
    branch_pool = MaxPooling2D(3, strides=2, padding='same')(x)
    branches = [branch_0, branch_1, branch_2, branch_pool]
    x = Concatenate(axis=channel_axis, name='mixed_7a')(branches)

    # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
    for block_idx in range(1, 10):
        x = inception_resnet_block(x,
                                   scale=0.2,
                                   block_type='block8',
                                   block_idx=block_idx)
    x = inception_resnet_block(x,
                               scale=1.,
                               activation=None,
                               block_type='block8',
                               block_idx=10)

    # Final convolution block: 8 x 8 x 1536
    x = conv2d_bn(x, 1536, 1, name='conv_7b')

    if include_top:
        # Classification block
        x = GlobalAveragePooling2D(name='avg_pool')(x)
        x = Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model
    model = Model(inputs, x, name='inception_resnet_v2')

    # Load weights
    if weights == 'imagenet':
        if K.image_data_format() == 'channels_first':
            if K.backend() == 'tensorflow':
                warnings.warn('You are using the TensorFlow backend, yet you '
                              'are using the Theano '
                              'image data format convention '
                              '(`image_data_format="channels_first"`). '
                              'For best performance, set '
                              '`image_data_format="channels_last"` in '
                              'your Keras config '
                              'at ~/.keras/keras.json.')
        if include_top:
            fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5'
            weights_path = get_file(
                fname,
                BASE_WEIGHT_URL + fname,
                cache_subdir='models',
                file_hash='e693bd0210a403b3192acc6073ad2e96')
        else:
            fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5'
            weights_path = get_file(
                fname,
                BASE_WEIGHT_URL + fname,
                cache_subdir='models',
                file_hash='d19885ff4a710c122648d3b5c3b684e4')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
示例#23
0
    def create_embedding_model(self):
        """
        """
        sparse_emb_list = []
        sparse_input_list = []
        merge_input_len = 0
        i = 0
        for sparse_feature in USED_CATEGORY_FEATURES:
            sparse_input = Input(shape=(1, ), name=sparse_feature)
            sparse_input_list.append(sparse_input)
            max_id = self.cat_max[sparse_feature]
            emb_dim = self.emb_dim[i]  #SPARSE_FEATURES[sparse_feature]["emb"]
            i += 1
            sparse_embedding = Embedding(
                max_id + 1,
                emb_dim,
                input_length=1,
                trainable=True,
                name=sparse_feature + "_emb",  #embeddings_regularizer = l1(0)
            )(sparse_input)
            sparse_embedding = Reshape((emb_dim, ))(sparse_embedding)
            sparse_emb_list.append(sparse_embedding)
            merge_input_len += emb_dim

        # for i in range(len(USED_CATEGORY_FEATURES)):
        #     sparse_feature = USED_CATEGORY_FEATURES[i]
        #     if sparse_feature == 'ip':
        #         continue
        #     sparse_input = sparse_input_list[i]
        #     max_id = SPARSE_FEATURES[sparse_feature]["max"]
        #     emb_dim = self.emb_dim[i] #SPARSE_FEATURES[sparse_feature]["emb"]
        #     i += 1
        #     sparse_embedding = Embedding(max_id + 1, emb_dim, input_length = 1, name = sparse_feature + "_emb_trainable")(sparse_input)
        #     sparse_embedding = Reshape((emb_dim,))(sparse_embedding)
        #     sparse_emb_list.append(sparse_embedding)
        #     merge_input_len += emb_dim
        merge_sparse_emb = Concatenate(
            name='merge_sparse_emb_trainable')(sparse_emb_list)

        dense_input = Input(shape=(self.dense_input_len, ))
        norm_dense_input = BatchNormalization(
            name='Dense_BN_trainable')(dense_input)

        desc_seq = Input(shape=(self.max_len[0], ))
        desc_cnn_conc = self.Create_CNN(desc_seq, name_suffix='_desc')
        title_seq = Input(shape=(self.max_len[1], ))
        title_cnn_conc = self.Create_CNN(title_seq, name_suffix='_title')

        merge_input = Concatenate(name = 'merge_input_trainable')([merge_sparse_emb, norm_dense_input, \
            desc_cnn_conc, title_cnn_conc
        ])
        dense_output = self.full_connect_layer(merge_input)
        deep_pre_sigmoid = Dense(
            1, name='deep_pre_sigmoid_trainable')(dense_output)

        proba = Activation('sigmoid', name='proba_trainable')(
            deep_pre_sigmoid)  #Add()([wide_pre_sigmoid, deep_pre_sigmoid]))

        model = Model(sparse_input_list + [dense_input, \
        # desc_seq, title_seq

        ], proba)
        model.compile(optimizer='adam',
                      loss='mean_squared_error')  #, metrics = ['accuracy'])

        # k_model = load_model('../Data/model_allSparse_09763.h5')
        # print (k_model.summary())
        # model.load_weights('../Data/model_allSparse_09763.h5', by_name=True)

        return model
示例#24
0
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'):
    """Adds a Inception-ResNet block.
    This function builds 3 types of Inception-ResNet blocks mentioned
    in the paper, controlled by the `block_type` argument (which is the
    block name used in the official TF-slim implementation):
        - Inception-ResNet-A: `block_type='block35'`
        - Inception-ResNet-B: `block_type='block17'`
        - Inception-ResNet-C: `block_type='block8'`
    # Arguments
        x: input tensor.
        scale: scaling factor to scale the residuals (i.e., the output of
            passing `x` through an inception module) before adding them
            to the shortcut branch. Let `r` be the output from the residual branch,
            the output of this block will be `x + scale * r`.
        block_type: `'block35'`, `'block17'` or `'block8'`, determines
            the network structure in the residual branch.
        block_idx: an `int` used for generating layer names. The Inception-ResNet blocks
            are repeated many times in this network. We use `block_idx` to identify
            each of the repetitions. For example, the first Inception-ResNet-A block
            will have `block_type='block35', block_idx=0`, ane the layer names will have
            a common prefix `'block35_0'`.
        activation: activation function to use at the end of the block
            (see [activations](../activations.md)).
            When `activation=None`, no activation is applied
            (i.e., "linear" activation: `a(x) = x`).
    # Returns
        Output tensor for the block.
    # Raises
        ValueError: if `block_type` is not one of `'block35'`,
            `'block17'` or `'block8'`.
    """
    if block_type == 'block35':
        branch_0 = conv2d_bn(x, 32, 1)
        branch_1 = conv2d_bn(x, 32, 1)
        branch_1 = conv2d_bn(branch_1, 32, 3)
        branch_2 = conv2d_bn(x, 32, 1)
        branch_2 = conv2d_bn(branch_2, 48, 3)
        branch_2 = conv2d_bn(branch_2, 64, 3)
        branches = [branch_0, branch_1, branch_2]
    elif block_type == 'block17':
        branch_0 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(x, 128, 1)
        branch_1 = conv2d_bn(branch_1, 160, [1, 7])
        branch_1 = conv2d_bn(branch_1, 192, [7, 1])
        branches = [branch_0, branch_1]
    elif block_type == 'block8':
        branch_0 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(x, 192, 1)
        branch_1 = conv2d_bn(branch_1, 224, [1, 3])
        branch_1 = conv2d_bn(branch_1, 256, [3, 1])
        branches = [branch_0, branch_1]
    else:
        raise ValueError('Unknown Inception-ResNet block type. '
                         'Expects "block35", "block17" or "block8", '
                         'but got: ' + str(block_type))

    block_name = block_type + '_' + str(block_idx)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
    mixed = Concatenate(axis=channel_axis,
                        name=block_name + '_mixed')(branches)
    up = conv2d_bn(mixed,
                   K.int_shape(x)[channel_axis],
                   1,
                   activation=None,
                   use_bias=True,
                   name=block_name + '_conv')

    x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale,
               output_shape=K.int_shape(x)[1:],
               arguments={'scale': scale},
               name=block_name)([x, up])

    if activation is not None:
        x = Activation(activation, name=block_name + '_ac')(x)
    return x
示例#25
0
def svhn_complicated_ensemble(input_shape=None,
                              input_tensor=None,
                              n_classes=None,
                              weights_path: Union[None, str] = None) -> Model:
    """
    Defines a svhn network.

    :param n_classes: used in order to be compatible with the main script.
    :param input_shape: the input shape of the network. Can be omitted if input_tensor is used.
    :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used.
    :param weights_path: a path to a trained custom network's weights.
    :return: Keras functional API Model.
    """
    output_list = []
    inputs = create_inputs(input_shape, input_tensor)

    # Define a weight decay for the regularisation.
    weight_decay = 1e-4

    # Submodel 1.
    # Block1.
    x1 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel1_block1_conv1',
                kernel_regularizer=l2(weight_decay))(inputs)
    x1 = BatchNormalization(name='submodel1_block1_batch-norm')(x1)
    x1 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel1_block1_conv2',
                kernel_regularizer=l2(weight_decay))(x1)
    x1 = MaxPooling2D(pool_size=(2, 2), name='submodel1_block1_pool')(x1)
    x1 = Dropout(0.2, name='submodel1_block1_dropout', seed=0)(x1)

    # Block2
    x1 = Conv2D(64, (3, 3),
                padding='same',
                activation='elu',
                name='submodel1_block2_conv1',
                kernel_regularizer=l2(weight_decay))(x1)
    x1 = BatchNormalization(name='submodel1_block2_batch-norm')(x1)
    x1 = Conv2D(64, (3, 3),
                padding='same',
                activation='elu',
                name='submodel1_block2_conv2',
                kernel_regularizer=l2(weight_decay))(x1)
    x1 = MaxPooling2D(pool_size=(2, 2), name='submodel1_block2_pool')(x1)
    x1 = Dropout(0.4, name='submodel1_block2_dropout', seed=0)(x1)

    # Add Submodel 1 top layers.
    x1 = Flatten(name='submodel1_flatten')(x1)
    outputs1 = Dense(2, name='submodel1_output')(x1)
    # Crop outputs1 in order to create the first submodel's output.
    outputs_first_submodel = Crop(1, 0, 1,
                                  name='first_class_submodel')(outputs1)
    output_list.append(outputs_first_submodel)

    # Submodel 2.
    x2 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel2_conv1',
                kernel_regularizer=l2(weight_decay))(inputs)
    x2 = BatchNormalization(name='submodel2_batch-norm')(x2)
    x2 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel2_conv2',
                kernel_regularizer=l2(weight_decay))(x2)
    x2 = MaxPooling2D(pool_size=(2, 2), name='submodel2_pool')(x2)
    x2 = Dropout(0.3, name='submodel2_dropout', seed=0)(x2)

    # Add Submodel 2 top layers.
    x2 = Flatten(name='submodel2_flatten')(x2)
    outputs2 = Dense(3, name='submodel2_output')(x2)

    # Average the predictions for the second class of the first two submodels.
    averaged_class_2 = Average(name='averaged_second_class')(
        [Crop(1, 1, 2)(outputs1),
         Crop(1, 0, 1)(outputs2)])
    # Crop outputs2 in order to create the third class output.
    outputs_class3 = Crop(1, 1, 2, name='third_class')(outputs2)
    # Concatenate classes outputs in order to create the second submodel's output.
    outputs_second_submodel = Concatenate(name='second_submodel')(
        [averaged_class_2, outputs_class3])
    output_list.append(outputs_second_submodel)

    # Submodel 3.
    x3 = Conv2D(64, (3, 3),
                padding='same',
                activation='elu',
                name='submodel3_conv1',
                kernel_regularizer=l2(weight_decay))(inputs)
    x3 = BatchNormalization(name='submodel3_batch-norm')(x3)
    x3 = Conv2D(64, (3, 3),
                padding='same',
                activation='elu',
                name='submodel3_conv2',
                kernel_regularizer=l2(weight_decay))(x3)
    x3 = MaxPooling2D(pool_size=(2, 2), name='submodel3_pool')(x3)
    x3 = Dropout(0.3, name='submodel3_dropout', seed=0)(x3)

    # Add Submodel 3 top layers.
    x3 = Flatten(name='submodel3_flatten')(x3)
    outputs3 = Dense(3, name='submodel3_output')(x3)

    # Average the predictions for the fourth class of the last two submodels.
    averaged_class_4 = Average(name='averaged_fourth_class')(
        [Crop(1, 2, 3)(outputs2),
         Crop(1, 0, 1)(outputs3)])
    # Crop outputs3 in order to create the fifth abd sixth class outputs.
    outputs_class5 = Crop(1, 1, 2, name='fifth_class')(outputs3)
    outputs_class6 = Crop(1, 2, 3, name='sixth_class')(outputs3)
    # Concatenate classes outputs in order to create the third submodel's output.
    outputs_third_submodel = Concatenate(name='third_submodel')(
        [averaged_class_4, outputs_class5, outputs_class6])
    output_list.append(outputs_third_submodel)

    # Submodel 4.
    # Block1.
    x4 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel4_block1_conv1',
                kernel_regularizer=l2(weight_decay))(inputs)
    x4 = BatchNormalization(name='submodel4_block1_batch-norm')(x4)
    x4 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel4_block1_conv2',
                kernel_regularizer=l2(weight_decay))(x4)
    x4 = MaxPooling2D(pool_size=(2, 2), name='submodel4_block1_pool')(x4)
    x4 = Dropout(0.2, name='submodel4_block1_dropout', seed=0)(x4)

    # Block2
    x4 = Conv2D(64, (3, 3),
                padding='same',
                activation='elu',
                name='submodel4_block2_conv1',
                kernel_regularizer=l2(weight_decay))(x4)
    x4 = BatchNormalization(name='submodel4_block2_batch-norm')(x4)
    x4 = Conv2D(64, (3, 3),
                padding='same',
                activation='elu',
                name='submodel4_block2_conv2',
                kernel_regularizer=l2(weight_decay))(x4)
    x4 = MaxPooling2D(pool_size=(2, 2), name='submodel4_block2_pool')(x4)
    x4 = Dropout(0.4, name='submodel4_block2_dropout', seed=0)(x4)

    # Add Submodel 4 top layers.
    x4 = Flatten(name='submodel4_flatten')(x4)
    outputs4 = Dense(2, name='seventh_eighth_class_submodel4')(x4)
    output_list.append(outputs4)

    # Submodel 5.
    # Block1.
    x5 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel5_block1_conv1',
                kernel_regularizer=l2(weight_decay))(inputs)
    x5 = BatchNormalization(name='submodel5_block1_batch-norm')(x5)
    x5 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel5_block1_conv2',
                kernel_regularizer=l2(weight_decay))(x5)
    x5 = MaxPooling2D(pool_size=(2, 2), name='submodel5_block1_pool')(x5)
    x5 = Dropout(0.2, name='submodel5_block1_dropout', seed=0)(x5)

    # Block2
    x5 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel5_block3_conv1',
                kernel_regularizer=l2(weight_decay))(x5)
    x5 = BatchNormalization(name='submodel5_block2_batch-norm')(x5)
    x5 = Conv2D(32, (3, 3),
                padding='same',
                activation='elu',
                name='submodel5_block3_conv2',
                kernel_regularizer=l2(weight_decay))(x5)
    x5 = MaxPooling2D(pool_size=(2, 2), name='submodel5_block3_pool')(x5)
    x5 = Dropout(0.4, name='submodel5_block2_dropout', seed=0)(x5)

    # Add Submodel 5 top layers.
    x5 = Flatten(name='submodel5_flatten')(x5)
    outputs5 = Dense(2, name='ninth_tenth_class_submodel5')(x5)
    output_list.append(outputs5)

    # Concatenate all class predictions together.
    outputs = Concatenate(name='output')(output_list)
    outputs = Softmax(name='output_softmax')(outputs)

    # Create model.
    model = Model(inputs, outputs, name='svhn_complicated_ensemble')
    # Load weights, if they exist.
    load_weights(weights_path, model)

    return model
示例#26
0
    def get_model(self):
        if not self.model:
            mashup_id_input = Input(shape=(1, ),
                                    dtype='int32',
                                    name='mashup_id_input')
            api_id_input = Input(shape=(1, ),
                                 dtype='int32',
                                 name='api_id_input')
            inputs = [mashup_id_input, api_id_input]

            mashup_text_fea = self.mid2text_fea_layer(
                mashup_id_input)  # (None,1,25)
            api_text_fea = self.aid2text_fea_layer(api_id_input)  # (None,1,25)

            mashup_tag_fea = self.mid2tag_fea_layer(
                mashup_id_input)  # (None,1,25)
            api_tag_fea = self.aid2tag_fea_layer(api_id_input)  # (None,1,25)

            api_implict_emb = self.api_implict_emb_layer(
                api_id_input)  # (None,1,25)

            feature_list = [
                mashup_text_fea, api_text_fea, mashup_tag_fea, api_tag_fea,
                api_implict_emb
            ]

            if self.new_old == 'new' and new_Para.param.need_slt_apis:
                mashup_slt_apis_input = Input(
                    shape=(new_Para.param.slt_item_num, ),
                    dtype='int32',
                    name='slt_api_ids_input')
                inputs.append(mashup_slt_apis_input)

                keys_slt_api_text_feas = self.aid2text_fea_layer(
                    mashup_slt_apis_input)  # (None,3,25)
                keys_slt_api_tag_feas = self.aid2tag_fea_layer(
                    mashup_slt_apis_input)  # (None,3,25)
                keys_slt_api_implict_embs = self.api_implict_emb_layer(
                    mashup_slt_apis_input)  # (None,3,25)

                mask = Lambda(lambda x: K.not_equal(x, self.all_api_num))(
                    mashup_slt_apis_input)  # (?, 3) !!!

                # query_api_text_vec = Lambda(lambda x: tf.expand_dims(x, axis=1))(api_text_fea)  # (?, 50)->(?, 1, 50)
                # query_api_tag_vec = Lambda(lambda x: tf.expand_dims(x, axis=1))(api_tag_fea)
                # query_api_implict_emb = Lambda(lambda x: tf.expand_dims(x, axis=1))(api_implict_emb)

                # 压缩历史,得到向量  ->(?, 1, 50)
                text_hist = AttentionSequencePoolingLayer(
                    supports_masking=True)(
                        [api_text_fea, keys_slt_api_text_feas], mask=mask)
                tag_hist = AttentionSequencePoolingLayer(
                    supports_masking=True)(
                        [api_tag_fea, keys_slt_api_tag_feas], mask=mask)
                implict_emb_hist = AttentionSequencePoolingLayer(
                    supports_masking=True)(
                        [api_implict_emb, keys_slt_api_implict_embs],
                        mask=mask)

                feature_list = [
                    mashup_text_fea, api_text_fea, text_hist, mashup_tag_fea,
                    api_tag_fea, tag_hist, api_implict_emb, implict_emb_hist
                ]
                feature_list = list(map(NoMask(),
                                        feature_list))  # DNN不支持mak,所以不能再传递mask

            all_features = Concatenate(
                name='all_content_concatenate')(feature_list)
            all_features = Lambda(lambda x: tf.squeeze(x, axis=1))(
                all_features)

            output = DNN(self.predict_fc_unit_nums[:-1])(all_features)
            output = Dense(self.predict_fc_unit_nums[-1],
                           activation='relu',
                           kernel_regularizer=l2(
                               new_Para.param.l2_reg))(output)

            # 输出层
            if new_Para.param.final_activation == 'softmax':
                predict_result = Dense(2,
                                       activation='softmax',
                                       name="prediction")(output)
            elif new_Para.param.final_activation == 'sigmoid':
                predict_result = Dense(1,
                                       activation='sigmoid',
                                       kernel_initializer='lecun_uniform',
                                       name="prediction")(output)

            self.model = Model(inputs=inputs,
                               outputs=[predict_result],
                               name='predict_model')
        return self.model
def get_prediction_model(name, in_shape, include_top, algorithm_instance,
                         num_classes, kwargs):
    if name == "big_fully":
        input_l = Input(in_shape)
        output_l = fully_connected_big(input_l,
                                       include_top=include_top,
                                       **kwargs)
        model = Model(input_l, output_l)
    elif name == "simple_multiclass":
        input_l = Input(in_shape)
        output_l = simple_multiclass(input_l,
                                     include_top=include_top,
                                     **kwargs)
        model = Model(input_l, output_l)
    elif name == "unet_2d_upconv":
        assert algorithm_instance is not None, "no algorithm instance for 2d skip connections found"
        assert algorithm_instance.layer_data is not None, "no layer data for 2d skip connections found"

        first_input = Input(in_shape)
        includes_pooling = algorithm_instance.layer_data[2]

        if includes_pooling:
            x = UpSampling2D((2, 2))(first_input)
        else:
            x = first_input

        inputs_skip = [
            Input(x.shape[1:])
            for x in reversed(algorithm_instance.layer_data[0])
        ]
        inputs_up = [x] + inputs_skip

        model_up_out = upconv_model(
            x.shape[1:],
            down_layers=algorithm_instance.layer_data[0],
            filters=algorithm_instance.layer_data[1],
            num_classes=num_classes)(inputs_up)

        return Model(inputs=[first_input, *inputs_skip], outputs=model_up_out)
    elif name == "unet_3d_upconv":
        assert algorithm_instance is not None, "no algorithm instance for 3d skip connections found"
        assert algorithm_instance.layer_data is not None, "no layer data for 3d skip connections found"

        first_input = Input(in_shape)
        includes_pooling = algorithm_instance.layer_data[2]

        if includes_pooling:
            x = UpSampling3D((2, 2, 2))(first_input)
        else:
            x = first_input

        inputs_skip = [
            Input(x.shape[1:])
            for x in reversed(algorithm_instance.layer_data[0])
        ]
        inputs_up = [x] + inputs_skip

        model_up_out = upconv_model_3d(
            x.shape[1:],
            down_layers=algorithm_instance.layer_data[0],
            filters=algorithm_instance.layer_data[1],
            num_classes=num_classes)(inputs_up)

        return Model(inputs=[first_input, *inputs_skip], outputs=model_up_out)
    elif name == "unet_3d_upconv_patches":
        # This version of the unet3d model creates a separate unet for each patch. Currently unused
        assert algorithm_instance is not None, "no algorithm instance for 3d skip connections found"
        assert algorithm_instance.layer_data is not None, "no layer data for 3d skip connections found"

        n_patches = in_shape[0]
        embed_dim = in_shape[1]

        # combine all predictions from encoders to one layer and split up again
        first_input = Input(in_shape)
        flat = Flatten()(first_input)
        processed_first_input = Dense(n_patches * embed_dim,
                                      activation="relu")(flat)
        processed_first_input = Reshape(
            (n_patches, embed_dim))(processed_first_input)

        # get the first shape of the upconv from the encoder
        # get whether the last layer is a pooling layer
        first_l_shape = algorithm_instance.layer_data[2][0]
        includes_pooling = algorithm_instance.layer_data[2][1]
        units = np.prod(first_l_shape)

        # build small model that selects a small shape from the unified predictions
        model_first_up = Sequential()
        model_first_up.add(Input(embed_dim))
        model_first_up.add(Dense(units, activation="relu"))
        model_first_up.add(Reshape(first_l_shape))
        if includes_pooling:
            model_first_up.add(UpSampling3D((2, 2, 2)))

        # apply selection to get input for decoder models
        processed_first_input = TimeDistributed(model_first_up)(
            processed_first_input)

        # prepare decoder
        model_up = upconv_model_3d(
            processed_first_input.shape[2:],
            down_layers=algorithm_instance.layer_data[0],
            filters=algorithm_instance.layer_data[1],
            num_classes=num_classes)

        pred_patches = []
        large_inputs = [first_input]

        for s in reversed(algorithm_instance.layer_data[0]):
            large_inputs.append(Input(n_patches + s.shape[1:]))

        for p in range(n_patches):
            y = [
                Lambda(lambda x: x[:, p, :, :, :, :],
                       output_shape=processed_first_input.shape[2:])
            ]
            for s in reversed(algorithm_instance.layer_data[0]):
                y.append(
                    Lambda(lambda x: x[:, p, :, :, :, :],
                           output_shape=s.shape[1:]))

            small_inputs = [y[0](processed_first_input)
                            ]  # the first input has to be processed
            for i in range(1, len(large_inputs)):
                small_inputs.append(y[i](
                    large_inputs[i]))  # we can take the rest as is

            pred_patches.append(model_up(small_inputs))

        last_out = Concatenate(axis=1)(pred_patches)
        last_out = Reshape((n_patches, ) +
                           model_up.layers[-1].output_shape[1:])(last_out)

        model = Model(inputs=large_inputs, outputs=[last_out])
    elif name == "none":
        return None
    else:
        raise ValueError("model " + name + " not found")

    return model
    def _model_construction_test(self, is_training=True):
        # Model
        encoder_inputs = Input(shape=(self._seq_len, self._input_dim),
                               name='encoder_input')
        encoder_outputs, enc_state_h, enc_state_c = Residual_enc(
            encoder_inputs,
            rnn_unit=self._rnn_units,
            rnn_depth=self._rnn_layers,
            rnn_dropout=self._drop_out)

        encoder_states = [enc_state_h, enc_state_c]

        decoder_inputs = Input(shape=(None, self._output_dim),
                               name='decoder_input')

        layers_dec, decoder_outputs, dec_state_h, dec_state_c = Residual_dec(
            decoder_inputs,
            rnn_unit=self._rnn_units,
            rnn_depth=self._rnn_layers,
            rnn_dropout=self._drop_out,
            init_states=encoder_states)

        attn_layer = AttentionLayer(input_shape=([
            self._batch_size, self._seq_len, self._rnn_units
        ], [self._batch_size, self._seq_len, self._rnn_units]),
                                    name='attention_layer')
        attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])
        decoder_outputs = Concatenate(
            axis=-1, name='concat_layer')([decoder_outputs, attn_out])

        # dense decoder_outputs
        decoder_dense = Dense(self._output_dim, activation='relu')
        decoder_outputs = decoder_dense(decoder_outputs)

        # Define the model that will turn
        # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
        model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
        if is_training:
            return model
        else:
            self._logger.info("Load model from: {}".format(self._log_dir))
            model.load_weights(self._log_dir + 'best_model.hdf5')
            model.compile(optimizer=self._optimizer,
                          loss='mse',
                          metrics=['mse', 'mae'])
            # --------------------------------------- ENcoder model ----------------------------------------------------
            self.encoder_model = Model(encoder_inputs,
                                       [encoder_outputs] + encoder_states)
            # plot_model(model=self.encoder_model, to_file=self._log_dir + '/encoder.png', show_shapes=True)

            # --------------------------------------- Decoder model ----------------------------------------------------
            decoder_state_input_h = Input(shape=(self._rnn_units, ),
                                          name='decoder_state_input_h')
            decoder_state_input_c = Input(shape=(self._rnn_units, ),
                                          name='decoder_state_input_c')
            decoder_states_inputs = [
                decoder_state_input_h, decoder_state_input_c
            ]

            decoder_outputs, _, _ = layers_dec[0](
                decoder_inputs, initial_state=decoder_states_inputs)
            for i in range(1, self._rnn_layers):
                d_o, dec_state_h, dec_state_c = layers_dec[i](decoder_outputs)
                decoder_outputs = add([decoder_outputs, d_o])

            decoder_states = [dec_state_h, dec_state_c]

            encoder_inf_states = Input(shape=(self._seq_len, self._rnn_units),
                                       name='encoder_inf_states_input')
            attn_out, attn_states = attn_layer(
                [encoder_inf_states, decoder_outputs])

            decoder_outputs = Concatenate(
                axis=-1, name='concat')([decoder_outputs, attn_out])
            decoder_dense = Dense(self._output_dim, activation='relu')
            decoder_outputs = decoder_dense(decoder_outputs)
            self.decoder_model = Model([decoder_inputs, encoder_inf_states] +
                                       decoder_states_inputs,
                                       [decoder_outputs] + decoder_states)

            # plot_model(model=self.decoder_model, to_file=self._log_dir + '/decoder.png', show_shapes=True)
            return model
示例#29
0
def DIN(dnn_feature_columns,
        history_feature_list,
        embedding_size=8,
        hist_len_max=16,
        dnn_use_bn=False,
        dnn_hidden_units=(200, 80),
        dnn_activation='relu',
        att_hidden_size=(80, 40),
        att_activation="dice",
        att_weight_normalization=False,
        l2_reg_dnn=0,
        l2_reg_embedding=1e-6,
        dnn_dropout=0,
        init_std=0.0001,
        seed=1024,
        task='binary'):
    """Instantiates the Deep Interest Network architecture.

    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param history_feature_list: list,to indicate  sequence sparse field
    :param embedding_size: positive integer,sparse feature embedding_size.
    :param hist_len_max: positive int, to indicate the max length of seq input
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
    :param att_activation: Activation function to use in attention net
    :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """

    features = build_input_features(dnn_feature_columns)

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []

    history_feature_columns = []
    sparse_varlen_feature_columns = []
    history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in history_fc_names:
            history_feature_columns.append(fc)
        else:
            sparse_varlen_feature_columns.append(fc)

    inputs_list = list(features.values())

    embedding_dict = create_embedding_matrix(dnn_feature_columns,
                                             l2_reg_embedding,
                                             init_std,
                                             seed,
                                             embedding_size,
                                             prefix="")

    query_emb_list = embedding_lookup(embedding_dict, features,
                                      sparse_feature_columns,
                                      history_feature_list,
                                      history_feature_list)  #query是单独的
    keys_emb_list = embedding_lookup(embedding_dict, features,
                                     history_feature_columns, history_fc_names,
                                     history_fc_names)
    dnn_input_emb_list = embedding_lookup(embedding_dict,
                                          features,
                                          sparse_feature_columns,
                                          mask_feat_list=history_feature_list)
    dense_value_list = get_dense_input(features, dense_feature_columns)

    sequence_embed_dict = varlen_embedding_lookup(
        embedding_dict, features, sparse_varlen_feature_columns)
    sequence_embed_list = get_varlen_pooling_list(
        sequence_embed_dict, features, sparse_varlen_feature_columns)
    dnn_input_emb_list += sequence_embed_list

    keys_emb = concat_fun(keys_emb_list)
    deep_input_emb = concat_fun(dnn_input_emb_list)
    query_emb = concat_fun(query_emb_list)

    hist = AttentionSequencePoolingLayer(
        att_hidden_size,
        att_activation,
        weight_normalization=att_weight_normalization,
        supports_masking=True)([query_emb, keys_emb])

    deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist])
    deep_input_emb = Flatten()(deep_input_emb)
    dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 dnn_use_bn, seed)(dnn_input)
    final_logit = Dense(1, use_bias=False)(output)

    output = PredictionLayer(task)(final_logit)

    model = Model(inputs=inputs_list, outputs=output)
    return model
示例#30
0
def DCN(
    feature_dim_dict,
    embedding_size='auto',
    cross_num=2,
    hidden_size=[
        128,
        128,
    ],
    l2_reg_embedding=1e-5,
    l2_reg_cross=1e-5,
    l2_reg_deep=0,
    init_std=0.0001,
    seed=1024,
    keep_prob=1,
    use_bn=False,
    activation='relu',
    final_activation='sigmoid',
):
    """Instantiates the Deep&Cross Network architecture.

    :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
    :param embedding_size: positive int or str,sparse feature embedding_size.If set to "auto",it will be 6*pow(cardinality,025)
    :param cross_num: positive integet,cross layer number
    :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_cross: float. L2 regularizer strength applied to cross net
    :param l2_reg_deep: float. L2 regularizer strength applied to deep net
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param keep_prob: float in (0,1]. keep_prob used in deep net
    :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net
    :param activation: Activation function to use in deep net
    :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
    :return: A Keras model instance.

    """
    if len(hidden_size) == 0 and cross_num == 0:
        raise ValueError("Either hidden_layer or cross layer must > 0")
    if not isinstance(
            feature_dim_dict, dict
    ) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
        raise ValueError(
            "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}"
        )

    sparse_input, dense_input = get_input(
        feature_dim_dict,
        None,
    )
    sparse_embedding = get_embeddings(feature_dim_dict, embedding_size,
                                      init_std, seed, l2_reg_embedding)
    embed_list = [
        sparse_embedding[i](sparse_input[i]) for i in range(len(sparse_input))
    ]

    deep_input = Flatten()(Concatenate()(embed_list))
    if len(dense_input) > 0:
        if len(dense_input) == 1:
            continuous_list = dense_input[0]
        else:
            continuous_list = Concatenate()(dense_input)

        deep_input = Concatenate()([deep_input, continuous_list])

    if len(hidden_size) > 0 and cross_num > 0:  # Deep & Cross
        deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn,
                       seed)(deep_input)
        cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(deep_input)
        stack_out = Concatenate()([cross_out, deep_out])
        final_logit = Dense(1, use_bias=False, activation=None)(stack_out)
    elif len(hidden_size) > 0:  # Only Deep
        deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn,
                       seed)(deep_input)
        final_logit = Dense(1, use_bias=False, activation=None)(deep_out)
    elif cross_num > 0:  # Only Cross
        cross_out = CrossNet(cross_num, init_std=init_std,
                             l2_reg=l2_reg_cross)(deep_input)
        final_logit = Dense(1, use_bias=False, activation=None)(cross_out)
    else:  # Error
        raise NotImplementedError

    # Activation(self.final_activation)(final_logit)
    output = PredictionLayer(final_activation)(final_logit)
    model = Model(inputs=sparse_input + dense_input, outputs=output)

    return model