Пример #1
0
 def test_add(self):
     inputs = np.ones((1, 5, 2))
     weights = np.random.random((10, 2))
     weights[1, :] = np.asarray([0.25, 0.1])
     weights[3, :] = np.asarray([0.6, -0.2])
     model = keras.models.Sequential()
     model.add(
         PositionEmbedding(
             input_dim=10,
             output_dim=2,
             mode=PositionEmbedding.MODE_ADD,
             input_shape=(None, 2),
             weights=[weights],
             name='Pos-Embd',
         ))
     model.compile('adam', keras.losses.mae, {})
     model_path = os.path.join(tempfile.gettempdir(),
                               'test_pos_embd_%f.h5' % random.random())
     model.save(model_path)
     model = keras.models.load_model(
         model_path,
         custom_objects={'PositionEmbedding': PositionEmbedding})
     model.summary()
     predicts = model.predict(inputs)
     self.assertTrue(np.allclose([1.25, 1.1], predicts[0][1]), predicts[0])
     self.assertTrue(np.allclose([1.6, 0.8], predicts[0][3]), predicts[0])
Пример #2
0
def build_model(max_len: int,
                input_dim: int,
                embedding_dim: int,
                feed_forward_units: int,
                head_num=1,
                block_num=1,
                dropout_rate=0.5) -> Tuple[Model, Embedding]:

    inputs = Input(shape=(max_len))
    emb = Embedding(input_dim=input_dim,
                    output_dim=embedding_dim,
                    mask_zero=True)
    x = emb(inputs)
    pos_emb = PositionEmbedding(input_dim=max_len,
                                output_dim=embedding_dim,
                                mode=PositionEmbedding.MODE_ADD,
                                mask_zero=True)(x)
    y = Dropout(dropout_rate)(pos_emb)

    for _ in range(block_num):
        y = block(y, head_num, feed_forward_units, dropout_rate)

    model = Model(inputs=inputs, outputs=y)

    return model, emb
Пример #3
0
def get_model(
        n_vocab,
        n_ctx=1024,
        n_embd=768,
        n_head=12,
        n_layer=12,
        fixed_input_shape=False  # neededforTPU training 
):
    """Get basic GPT-2 model.

    :param n_vocab: Number of vocabulary tokens.
    :param n_ctx: The length of each input.
    :param n_embd: The dimension of embeddings.
    :param n_head: Number of heads in transformer.
    :param n_layer: Number of transformer blocks.
    :return: The model.
    """
    if fixed_input_shape:
        input_layer_shape = (n_ctx, )
    else:
        input_layer_shape = (None, )
    input_layer = keras.layers.Input(shape=input_layer_shape, name='Input')

    embed_token, embeddings = EmbeddingRet(
        input_dim=n_vocab,
        output_dim=n_embd,
        mask_zero=False,
        name='Embed-Token',
    )(input_layer)
    embed_token_pos = PositionEmbedding(
        input_dim=n_ctx,
        output_dim=n_embd,
        mode=PositionEmbedding.MODE_ADD,
        name='Embed-Token-Pos',
    )(embed_token)

    last_layer = embed_token_pos
    for i in range(n_layer):
        last_layer = _get_encoder_component(
            name='Encode-%d' % i,
            input_layer=last_layer,
            head_num=n_head,
            hidden_dim=n_embd * 4,
            attention_activation=None,
            feed_forward_activation=gelu,
        )

    norm_layer = LayerNormalization(name='Norm', )(last_layer)

    output_layer = EmbeddingSim(
        use_bias=False,
        name='Output',
    )([norm_layer, embeddings])

    model = keras.models.Model(inputs=input_layer, outputs=output_layer)
    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss=keras.losses.sparse_categorical_crossentropy,
    )
    return model
Пример #4
0
 def test_mask_zero(self):
     indices = np.asarray([[-4, 10, 100]])
     weights = np.random.random((21, 2))
     weights[6, :] = np.asarray([0.25, 0.1])
     weights[20, :] = np.asarray([0.6, -0.2])
     model = keras.models.Sequential()
     model.add(
         PositionEmbedding(
             input_dim=10,
             output_dim=2,
             mode=PositionEmbedding.MODE_EXPAND,
             mask_zero=100,
             input_shape=(None, ),
             weights=[weights],
             name='Pos-Embd',
         ))
     model.build()
     model.compile('adam', keras.losses.mae, [keras.metrics.mae])
     model_path = os.path.join(tempfile.gettempdir(),
                               'keras_pos_embd_%f.h5' % random.random())
     model.save(model_path)
     model = keras.models.load_model(
         model_path,
         custom_objects={'PositionEmbedding': PositionEmbedding})
     model.summary()
     predicts = model.predict(indices)
     expected = np.asarray([[
         [0.25, 0.1],
         [0.6, -0.2],
         [0.6, -0.2],
     ]])
     self.assertTrue(np.allclose(expected, predicts))
Пример #5
0
def get_embedding(inputs,
                  token_num,
                  pos_num,
                  embed_dim,
                  dropout_rate=0.1,
                  trainable=True):
    """Get embedding layer.

    See: https://arxiv.org/pdf/1810.04805.pdf

    :param inputs: Input layers.
    :param token_num: Number of tokens.
    :param pos_num: Maximum position.
    :param embed_dim: The dimension of all embedding layers.
    :param dropout_rate: Dropout rate.
    :param trainable: Whether the layers are trainable.
    :return: The merged embedding layer and weights of token embedding.
    """
    # embeddings = [
    #     TokenEmbedding(
    #         input_dim=token_num,
    #         output_dim=embed_dim,
    #         mask_zero=True,
    #         trainable=trainable,
    #         name='Embedding-Token',
    #     )(inputs[0]),
    #     keras.layers.Embedding(
    #         input_dim=2,
    #         output_dim=embed_dim,
    #         trainable=trainable,
    #         name='Embedding-Segment',
    #     )(inputs[1]),
    # ]
    embeddings = [1, 2]
    embeddings[0] = TokenEmbedding(
        input_dim=token_num,
        output_dim=embed_dim,
        mask_zero=True,
        trainable=trainable,
        name='Embedding-Token',
    )(inputs[0])
    embeddings[1] = keras.layers.Embedding(
        input_dim=2,
        output_dim=embed_dim,
        trainable=trainable,
        name='Embedding-Segment',
    )(inputs[1])
    embeddings[0], embed_weights = embeddings[0]

    embed_layer = keras.layers.Add(name='Embedding-Token-Segment')(embeddings)
    embed_layer = PositionEmbedding(
        input_dim=pos_num,
        output_dim=embed_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        name='Embedding-Position',
    )(embed_layer)
    return embed_layer, embed_weights
Пример #6
0
def get_embedding(inputs,
                  token_num,
                  pos_num,
                  embed_dim,
                  dropout_rate=0.1,
                  trainable=True):
    """Get embedding layer.

    See: https://arxiv.org/pdf/1810.04805.pdf

    :param inputs: Input layers.
    :param token_num: Number of tokens.
    :param pos_num: Maximum position.
    :param embed_dim: The dimension of all embedding layers.
    :param dropout_rate: Dropout rate.
    :param trainable: Whether the layers are trainable.
    :return: The merged embedding layer and weights of token embedding.
    """
    embeddings = [
        TokenEmbedding(
            input_dim=token_num,
            output_dim=embed_dim,
            mask_zero=True,
            trainable=trainable,
            embeddings_regularizer=keras.regularizers.l2(),
            name='Embedding-Token',
        )(inputs[0]),
        keras.layers.Embedding(
            input_dim=2,
            output_dim=embed_dim,
            trainable=trainable,
            embeddings_regularizer=keras.regularizers.l2(),
            name='Embedding-Segment',
        )(inputs[1]),
    ]
    embeddings[0], embed_weights = embeddings[0]
    embed_layer = keras.layers.Add(name='Embedding-Token-Segment')(embeddings)
    embed_layer = PositionEmbedding(
        input_dim=pos_num,
        output_dim=embed_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        embeddings_regularizer=keras.regularizers.l2(),
        name='Embedding-Position',
    )(embed_layer)
    if dropout_rate > 0.0:
        dropout_layer = keras.layers.Dropout(
            rate=dropout_rate,
            name='Embedding-Dropout',
        )(embed_layer)
    else:
        dropout_layer = embed_layer
    norm_layer = LayerNormalization(
        trainable=trainable,
        name='Embedding-Norm',
    )(dropout_layer)
    return norm_layer, embed_weights
Пример #7
0
def deprecated1_my_get_embedding(inputs,
                                 token_num,
                                 pos_num,
                                 embed_dim,
                                 dropout_rate=0.1,
                                 trainable=True):
    """Get embedding layer.

    See: https://arxiv.org/pdf/1810.04805.pdf

    :param inputs: Input layers.
    :param token_num: Number of tokens.
    :param pos_num: Maximum position.
    :param embed_dim: The dimension of all embedding layers.
    :param dropout_rate: Dropout rate.
    :param trainable: Whether the layers are trainable.
    :return: The merged embedding layer and weights of token embedding.
    """
    embeddings = [
        TokenEmbedding(
            input_dim=66,  # 有效AP的个数
            output_dim=embed_dim,
            mask_zero=True,
            trainable=trainable,
            name='Embedding-AP',
        )(inputs[0]),
        keras.layers.Embedding(
            input_dim=66,  # RSSI的可能取值是从(-0,-128db]取整数值
            output_dim=embed_dim,
            trainable=trainable,
            name='Embedding-RSSI',
            #  name='Embedding-Segment',
        )(inputs[1]),
    ]
    embeddings[0], embed_weights = embeddings[0]
    embed_layer = keras.layers.Add(name='Embedding-AP-RSSI')(embeddings)
    embed_layer = PositionEmbedding(
        input_dim=pos_num,
        output_dim=embed_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        name='Embedding-Position',
    )(embed_layer)
    return embed_layer, embed_weights
Пример #8
0
def build_albert(token_num,
                 pos_num=512,
                 seq_len=512,
                 embed_dim=128,
                 hidden_dim=768,
                 transformer_num=12,
                 head_num=12,
                 feed_forward_dim=3072,
                 dropout_rate=0.1,
                 attention_activation=None,
                 feed_forward_activation='gelu',
                 training=True,
                 trainable=None,
                 output_layers=None):
    """Get ALBERT model.
    See: https://arxiv.org/pdf/1909.11942.pdf
    :param token_num: Number of tokens.
    :param pos_num: Maximum position.
    :param seq_len: Maximum length of the input sequence or None.
    :param embed_dim: Dimensions of embeddings.
    :param hidden_dim: Dimensions of hidden layers.
    :param transformer_num: Number of transformers.
    :param head_num: Number of heads in multi-head attention
                    in each transformer.
    :param feed_forward_dim: Dimension of the feed forward layer
                             in each transformer.
    :param dropout_rate: Dropout rate.
    :param attention_activation: Activation for attention layers.
    :param feed_forward_activation: Activation for feed-forward layers.
    :param training: A built model with MLM and NSP outputs will be returned
                     if it is `True`, otherwise the input layers and the last
                     feature extraction layer will be returned.
    :param trainable: Whether the model is trainable.
    :param output_layers: A list of indices of output layers.
    """
    if attention_activation == 'gelu':
        attention_activation = gelu
    if feed_forward_activation == 'gelu':
        feed_forward_activation = gelu
    if trainable is None:
        trainable = training

    def _trainable(_layer):
        if isinstance(trainable, (list, tuple, set)):
            for prefix in trainable:
                if _layer.name.startswith(prefix):
                    return True
            return False
        return trainable

    # Build inputs
    input_token = keras.layers.Input(shape=(seq_len, ), name='Input-Token')
    input_segment = keras.layers.Input(shape=(seq_len, ), name='Input-Segment')
    inputs = [input_token, input_segment]

    # Build embeddings
    embed_token, embed_weights, embed_projection = AdaptiveEmbedding(
        input_dim=token_num,
        output_dim=hidden_dim,
        embed_dim=embed_dim,
        mask_zero=True,
        trainable=trainable,
        return_embeddings=True,
        return_projections=True,
        name='Embed-Token',
    )(input_token)
    embed_segment = keras.layers.Embedding(
        input_dim=2,
        output_dim=hidden_dim,
        trainable=trainable,
        name='Embed-Segment',
    )(input_segment)
    embed_layer = keras.layers.Add(name='Embed-Token-Segment')(
        [embed_token, embed_segment])
    embed_layer = PositionEmbedding(
        input_dim=pos_num,
        output_dim=hidden_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        name='Embedding-Position',
    )(embed_layer)

    if dropout_rate > 0.0:
        dropout_layer = keras.layers.Dropout(
            rate=dropout_rate,
            name='Embedding-Dropout',
        )(embed_layer)
    else:
        dropout_layer = embed_layer
    embed_layer = LayerNormalization(
        trainable=trainable,
        name='Embedding-Norm',
    )(dropout_layer)

    # Build shared transformer
    attention_layer = MultiHeadAttention(
        head_num=head_num,
        activation=attention_activation,
        name='Attention',
    )
    attention_normal = LayerNormalization(name='Attention-Normal')
    feed_forward_layer = FeedForward(units=feed_forward_dim,
                                     activation=feed_forward_activation,
                                     name='Feed-Forward')
    feed_forward_normal = LayerNormalization(name='Feed-Forward-Normal')

    transformed = embed_layer
    transformed_layers = []
    for i in range(transformer_num):
        attention_input = transformed
        transformed = attention_layer(transformed)
        if dropout_rate > 0.0:
            transformed = keras.layers.Dropout(
                rate=dropout_rate,
                name='Attention-Dropout-{}'.format(i + 1),
            )(transformed)
        transformed = keras.layers.Add(
            name='Attention-Add-{}'.format(i + 1), )(
                [attention_input, transformed])
        transformed = attention_normal(transformed)

        feed_forward_input = transformed
        transformed = feed_forward_layer(transformed)
        if dropout_rate > 0.0:
            transformed = keras.layers.Dropout(
                rate=dropout_rate,
                name='Feed-Forward-Dropout-{}'.format(i + 1),
            )(transformed)
        transformed = keras.layers.Add(
            name='Feed-Forward-Add-{}'.format(i + 1), )(
                [feed_forward_input, transformed])
        transformed = feed_forward_normal(transformed)
        transformed_layers.append(transformed)

    if training:
        # Build tasks
        mlm_dense_layer = keras.layers.Dense(
            units=hidden_dim,
            activation=feed_forward_activation,
            name='MLM-Dense',
        )(transformed)
        mlm_norm_layer = LayerNormalization(name='MLM-Norm')(mlm_dense_layer)
        mlm_pred_layer = AdaptiveSoftmax(
            input_dim=hidden_dim,
            output_dim=token_num,
            embed_dim=embed_dim,
            bind_embeddings=True,
            bind_projections=True,
            name='MLM-Sim',
        )([mlm_norm_layer, embed_weights, embed_projection])
        masked_layer = Masked(name='MLM')([mlm_pred_layer, inputs[-1]])
        extract_layer = Extract(index=0, name='Extract')(transformed)
        nsp_dense_layer = keras.layers.Dense(
            units=hidden_dim,
            activation='tanh',
            name='SOP-Dense',
        )(extract_layer)
        nsp_pred_layer = keras.layers.Dense(
            units=2,
            activation='softmax',
            name='SOP',
        )(nsp_dense_layer)
        model = keras.models.Model(inputs=inputs,
                                   outputs=[masked_layer, nsp_pred_layer])
        for layer in model.layers:
            layer.trainable = _trainable(layer)
        return model
    if output_layers is not None:
        if isinstance(output_layers, list):
            output_layers = [
                transformed_layers[index] for index in output_layers
            ]
            output = keras.layers.Concatenate(name='Output', )(output_layers)
        else:
            output = transformed_layers[output_layers]
        model = keras.models.Model(inputs=inputs, outputs=output)
        return model
    model = keras.models.Model(inputs=inputs, outputs=transformed)
    for layer in model.layers:
        layer.trainable = _trainable(layer)
    return inputs, transformed
Пример #9
0
 def get_model(self,
               params,
               a=False,
               b=False,
               c=False,
               d=False,
               e=False,
               f=False,
               g=False,
               dropout=0.5):
     hash_input = layers.Input(shape=(params['max_words'], ), dtype='int32')
     x = layers.Embedding(params['hash_mole'],
                          params['embed_size'],
                          input_length=params['max_words'],
                          name=self.embedding_name)(hash_input)
     x = layers.Dropout(dropout / 3)(x)
     if a:
         # did not train
         # needs positional embedding?
         x = MultiHeadAttention(4)(x)
         x = layers.Dropout(dropout / 3)(x)
     if b:
         x = layers.Bidirectional(
             self.get_lstm(params['units'] // 2, return_sequences=True))(x)
         x = layers.Dropout(dropout)(x)
         x = layers.TimeDistributed(MultiHeadAttention(4))(x)
         #x = layers.Flatten()(x)
         #x = layers.Dropout(dropout)(x)
         #x = layers.Dense(params['embed_size'])(x)
         x = layers.Dropout(dropout / 3)(x)
     #if c:
     x = layers.Bidirectional(
         self.get_lstm(params['units'] // 2,
                       return_sequences=False,
                       name=self.bidirectional_name))(x)
     x = layers.Dropout(dropout)(x)
     x = layers.RepeatVector(params['num_sylls'])(x)
     x = layers.Dropout(dropout)(x)
     if d:
         x = PositionEmbedding(input_dim=params['embed_size'],
                               output_dim=params['num_sylls'] * 4,
                               mode=PositionEmbedding.MODE_CONCAT)(x)
         x = layers.Dropout(dropout)(x)
         x = MultiHeadAttention(4)(x)
         x = layers.Dropout(dropout)(x)
     x = self.get_lstm(params['units'],
                       return_sequences=True,
                       name=self.cu_dnnlstm_name)(x)
     if e:
         x = PositionEmbedding(input_dim=params['units'],
                               output_dim=params['units'],
                               mode=PositionEmbedding.MODE_ADD)(x)
         x = layers.Dropout(dropout)(x)
         #x = layers.Dense(params['units'])(x)
         #x = layers.Dropout(dropout)(x)
     if f:
         # this was somewhat effective
         x = MultiHeadAttention(2)(x)
         x = layers.Dropout(dropout)(x)
     if g:
         x = layers.Dense(params['units'],
                          kernel_initializer='identity',
                          name='dense_identity',
                          activation='relu')(x)
         x = layers.Dropout(dropout)(x)
     output_layer = layers.Dense(params['max_features'],
                                 activation='softmax',
                                 name=self.dense_name)(x)
     model = Model(inputs=[hash_input], outputs=[output_layer])
     return model
Пример #10
0
    def __build_model(self):
        print("Building the model...")
        vocab_size = self.preprocessor.get_vocab_size(self.tokenizer)

        if self.context:
            embedding_matrix = self.preprocessor.get_embedding_matrix(self.tokenizer)
            left_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='left_input')
            x1 = Embedding(
                output_dim=EMBEDDING_SIZE, 
                input_dim=vocab_size, 
                input_length=MAX_LENGTH,
                weights=[embedding_matrix],
                trainable=True,
            )(left_input)

            pos_left_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='pos_left_input')
            x1p = keras.layers.Lambda(
                K.one_hot, 
                arguments={'num_classes': pos_size}, 
                output_shape=(MAX_LENGTH, pos_size)
            )(pos_left_input)

            target_input = Input(shape=(10,), dtype='int32', name='target_input')
            x2 = Embedding(
                output_dim=EMBEDDING_SIZE, 
                input_dim=vocab_size, 
                input_length=10,
                weights=[embedding_matrix],
                trainable=True,
            )(target_input)

            pos_target_input = Input(shape=(10,), dtype='int32', name='pos_target_input')
            x2p = keras.layers.Lambda(
                K.one_hot, 
                arguments={'num_classes': pos_size}, 
                output_shape=(10, pos_size)
            )(pos_target_input)

            right_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='right_input')
            x3 = Embedding(
                output_dim=EMBEDDING_SIZE, 
                input_dim=vocab_size, 
                input_length=MAX_LENGTH,
                weights=[embedding_matrix],
                trainable=True,
            )(right_input)

            pos_right_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='pos_right_input')
            x3p = keras.layers.Lambda(
                K.one_hot, 
                arguments={'num_classes': pos_size}, 
                output_shape=(MAX_LENGTH, pos_size)
            )(pos_right_input)

            aspect_input = Input(shape=(110,), dtype='int32', name='aspect_input')
            x4 = keras.layers.Lambda(
                k.one_hot, 
                arguments={'num_classes': len(ASPECT_LIST)}, 
                output_shape=(110, len(ASPECT_LIST))
            )(aspect_input)

            x = keras.layers.concatenate([x1, x2, x3], axis=1)
            xp = keras.layers.concatenate([x1p, x2p, x3p], axis=1)
            x = keras.layers.concatenate([x, xp, x4])

            x = Bidirectional(LSTM(256, return_sequences=True))(x)
            x = GlobalMaxPool1D()(x)
            x = Dropout(0.5)(x)
            x = Dense(256, activation='relu')(x)
            x = Dropout(0.5)(x)
            out = Dense(2, activation='softmax')(x)

            model = Model([left_input, target_input, right_input, pos_left_input, pos_target_input, pos_right_input, aspect_input], out)

            model.summary()

            model.compile(
                loss='categorical_crossentropy', 
                optimizer='adam', 
                metrics=['acc']
            )

        else:
            if self.embedding:
                embedding_matrix = self.preprocessor.get_embedding_matrix(self.tokenizer)
                main_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='main_input')
                x = Embedding(
                    output_dim=EMBEDDING_SIZE,
                    input_dim=vocab_size,
                    input_length=MAX_LENGTH,
                    weights=[embedding_matrix],
                    trainable=self.trainable_embedding,
                )(main_input)

                aspect_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='aspect_input')
                x2 = keras.layers.Lambda(
                    K.one_hot, 
                    arguments={'num_classes': len(ASPECT_LIST)}, 
                    output_shape=(MAX_LENGTH, len(ASPECT_LIST))
                )(aspect_input)
                x = keras.layers.concatenate([x, x2])

                if self.position_embd:
                    weights = np.random.random((201, 50))
                    position_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='position_input')
                    x2 = PositionEmbedding(
                        input_shape=(MAX_LENGTH,),
                        input_dim=100,    
                        output_dim=50,     
                        weights=[weights],
                        mode=PositionEmbedding.MODE_EXPAND,
                        name='position_embedding',
                    )(position_input)
                    x = keras.layers.concatenate([x, x2])

                if self.use_lexicon:
                    lex_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='lex_input')
                    x3 = keras.layers.Lambda(
                        K.one_hot, 
                        arguments={'num_classes': 3}, 
                        output_shape=(MAX_LENGTH, 3)
                    )(lex_input)
                    x = keras.layers.concatenate([x, x3])

                if self.pos_tag is 'embedding':
                    _, pos_size = self.preprocessor.get_pos_dict()
                    pos_input = Input(shape=(MAX_LENGTH,), dtype='int32', name='pos_input')
                    x4 = keras.layers.Lambda(
                        K.one_hot, 
                        arguments={'num_classes': pos_size}, 
                        output_shape=(MAX_LENGTH, pos_size)
                    )(pos_input)
                    x = keras.layers.concatenate([x, x4])

            else:
                new_embedding_size = EMBEDDING_SIZE + 6
                if self.pos_tag is 'one_hot':
                    new_embedding_size += 27
                if self.dependency is True:
                    new_embedding_size += 2
                print('embedding size: ', new_embedding_size)
                main_input = Input(shape=(MAX_LENGTH, new_embedding_size), name='main_input')

            print("1. Input")
            
            if self.use_rnn is True:
                if self.embedding is True:
                    if self.rnn_type is 'gru':
                        x = Bidirectional(GRU(self.n_neuron, return_sequences=True))(x)
                    else:
                        x = Bidirectional(LSTM(self.n_neuron, return_sequences=True))(x)
                else:
                    if self.rnn_type is 'gru':
                        x = Bidirectional(GRU(self.n_neuron, return_sequences=True))(main_input)
                    else:
                        x = Bidirectional(LSTM(self.n_neuron, return_sequences=True))(main_input)
                # x = GlobalMaxPool1D()(x)
                x = GlobalAvgPool1D()(x)
                x = Dropout(self.dropout)(x)

            print("2. LSTM")

            if self.use_cnn is True:
                pass

            if self.n_dense is not 0:
                for i in range(self.n_dense):
                    x = Dense(self.n_neuron, activation='relu')(x)
                    x = Dropout(self.dropout)(x)

            print("3. Dense")

            out = Dense(2, activation='softmax')(x)

            print("4. Out")

            x_input = list()
            x_input.append(main_input)
            x_input.append(aspect_input)

            if self.position_embd:
                x_input.append(position_input)
            if self.use_lexicon:
                x_input.append(lex_input)
            if self.pos_tag is 'embedding':
                x_input.append(pos_input)
            
            model = Model(x_input, out)

        print("5. Model")

        model.summary()
        model.compile(
            loss='categorical_crossentropy',
            optimizer=self.optimizer,
            metrics=['acc']
        )

        print("6. Done")

        return model
Пример #11
0
def get_model(n_vocab,
              n_ctx=1024,
              n_embd=768,
              n_head=12,
              n_layer=12,
              batch_size=None,
              fixed_input_shape=False):
    """Get basic GPT-2 model.

    :param n_vocab: Number of vocabulary tokens.
    :param n_ctx: The length of each input.
    :param n_embd: The dimension of embeddings.
    :param n_head: Number of heads in transformer.
    :param n_layer: Number of transformer blocks.
    :param batch_size: Batch size of the model.
    :param fixed_input_shape: Whether the length of input is fixed. (Needed for TPU training)
    :return: The model.
    """
    if fixed_input_shape:
        input_layer_shape = (batch_size, n_ctx)
    else:
        input_layer_shape = (batch_size, None)

    lm_input_layer = tf.keras.layers.Input(
        batch_shape=input_layer_shape,
        name='LMInput',
    )

    mc_input_layer = tf.keras.layers.Input(
        batch_shape=(batch_size, ),
        name='MCInput',
    )

    embed_token, embeddings = EmbeddingRet(
        input_dim=n_vocab,
        output_dim=n_embd,
        mask_zero=False,
        name='Embed-Token',
    )(lm_input_layer)

    embed_token_pos = PositionEmbedding(
        input_dim=n_ctx,
        output_dim=n_embd,
        mode=PositionEmbedding.MODE_ADD,
        name='Embed-Token-Pos',
    )(embed_token)

    last_layer = embed_token_pos
    for i in range(n_layer):
        last_layer = _get_encoder_component(
            name='Encode-%d' % i,
            input_layer=last_layer,
            head_num=n_head,
            hidden_dim=n_embd * 4,
            attention_activation=None,
            feed_forward_activation=gelu,
        )

    norm_layer = LayerNormalization(name='Norm', )(last_layer)

    lm_head = EmbeddingSim(
        use_bias=False,
        name='LMOutput',
    )([norm_layer, embeddings])

    mc_sequence_summary = SequenceSummary(name='MCSequenceSummary')(
        [norm_layer, mc_input_layer])

    mc_linear = Dense(units=1, input_shape=(n_embd, ),
                      name='MCDense')(mc_sequence_summary)

    mc_head = Dropout(rate=0.1, name='MCOutput')(mc_linear)

    losses = {
        "LMOutput": lm_loss_function,
        "MCOutput": mc_loss_function,
    }
    lossWeights = {"LMOutput": 2.0, "MCOutput": 1.0}

    metrics = {"LMOutput": get_metrics(), 'MCOutput': get_metrics(is_mc=True)}

    model = tf.keras.models.Model(inputs=[lm_input_layer, mc_input_layer],
                                  outputs=[lm_head, mc_head])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(clipnorm=1.),
        loss=losses,
        loss_weights=lossWeights,
        #metrics=metrics
    )
    return model
Пример #12
0
def get_model(num_article,
              num_magazine,
              num_search_keyword,
              article_embedding_matrix,
              negative_sample_size,
              transformer_num=1,
              head_num=10,
              feed_forward_dim=100,
              dropout_rate=0.1,
              attention_activation=None,
              feed_forward_activation=tf.nn.leaky_relu,
              lr=1e-4,
              decay_rate=1e-5,
              inference=False,
              weight_path=None):
    if inference:
        trainable = None
    else:
        trainable = True
    user_inputs = get_user_inputs()
    pos_item_inputs, pos_user_item_inputs = get_item_inputs('pos')
    neg_item_inputs = []
    neg_user_item_inputs = []
    for i in range(negative_sample_size):
        item_inputs, user_item_inputs = get_item_inputs('neg{}'.format(i))
        neg_item_inputs.append(item_inputs)
        neg_user_item_inputs.append(user_item_inputs)

    if trainable:
        article_embedding = keras.layers.Embedding(
            input_dim=num_article,
            output_dim=200,
            weights=[article_embedding_matrix],
            trainable=False,
            name='E-Article',
        )
    else:
        article_embedding = keras.layers.Embedding(
            input_dim=num_article,
            output_dim=200,
            trainable=False,
            name='E-Article',
        )
    magazine_embedding = keras.layers.Embedding(
        input_dim=num_magazine,
        output_dim=43,
        trainable=trainable,
        name='E-Magazine',
    )
    author_embedding = keras.layers.Embedding(
        input_dim=19024,
        output_dim=50,
        trainable=trainable,
        name='E-Author',
    )
    embed_layer = Concatenate(axis=-1, name='UserConcat')(
        [article_embedding(user_inputs[0]), magazine_embedding(user_inputs[1]), author_embedding(user_inputs[2]),
         user_inputs[3]])

    embed_layer = PositionEmbedding(
        input_dim=MAX_USER_SEQUENCE_LEN,
        output_dim=300,
        mode=PositionEmbedding.MODE_ADD,
        trainable=trainable,
        name='E-Position',
    )(embed_layer)

    user_feature = get_transformer(
        encoder_num=transformer_num,
        input_layer=embed_layer,
        head_num=head_num,
        hidden_dim=feed_forward_dim,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        trainable=trainable,
        name='User'
    )
    search_keyword_layer = keras.layers.Embedding(
        input_dim=num_search_keyword,
        output_dim=50,
        trainable=trainable,
        name='EMB-SearchKeyword',
    )(user_inputs[4])

    search_keyword_feature = get_transformer(
        encoder_num=transformer_num,
        input_layer=search_keyword_layer,
        head_num=head_num,
        hidden_dim=50,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        trainable=trainable,
        name='SK'
    )
    user_embedding = Concatenate(axis=-1, name='UserEmbedding1')([user_feature, search_keyword_feature])
    user_embedding = Dense(200, name='UserEmbedding2', activation=feed_forward_activation)(user_embedding)

    item_layer = Dense(200, name='ItemEmbedding', activation=feed_forward_activation)
    score0 = Dense(1024, name='Scorer0', activation=feed_forward_activation)
    score1 = Dense(512, name='Scorer1', activation=feed_forward_activation)
    score2 = Dense(256, name='Scorer2', activation=feed_forward_activation)
    '''
    if inference:
        final_activation = 'relu'
    else:
        final_activation = 'sigmoid'
    '''
    score3 = Dense(1, name='Scorer3', activation=None)

    def extract_item(inputs):
        target_article = Reshape(target_shape=(200,))(article_embedding(inputs[0]))
        target_magazine = Reshape(target_shape=(43,))(magazine_embedding(inputs[1]))
        target_author = Reshape(target_shape=(50,))(author_embedding(inputs[2]))
        item_feature = Concatenate(axis=-1)([target_article, target_magazine, target_author, inputs[3]])
        item_embedding = item_layer(item_feature)
        return item_embedding

    def scorer(user_embedding, item_embedding, inputs):

        merged = Concatenate(axis=-1)([user_embedding, item_embedding, inputs[0]])
        merged = score0(merged)
        merged = score1(merged)
        merged = score2(merged)
        output = score3(merged)
        return output

    pos_item_embedding = extract_item(pos_item_inputs)
    pos_score = scorer(user_embedding, pos_item_embedding, pos_user_item_inputs)
    neg_scores = []
    for i in range(negative_sample_size):
        neg_item_embedding = extract_item(neg_item_inputs[i])
        score = scorer(user_embedding, neg_item_embedding, neg_user_item_inputs[i])
        neg_scores.append(score)

    output = concatenate([pos_score] + neg_scores)

    inputs = list(user_inputs)
    inputs += pos_item_inputs
    inputs += pos_user_item_inputs
    for i in range(negative_sample_size):
        inputs += neg_item_inputs[i]
        inputs += neg_user_item_inputs[i]
    model = keras.models.Model(inputs=inputs, outputs=output)

    if inference:
        model.load_weights(weight_path)
        user_embed_input = keras.layers.Input(
            shape=(200,),
            name='I-UserEmbedding'
        )
        item_embed_input = keras.layers.Input(
            shape=(200,),
            name='I-ItemEmbedding'
        )
        scorer_inputs = [user_embed_input, item_embed_input] + pos_user_item_inputs
        scorer_output = scorer(user_embed_input, item_embed_input, pos_user_item_inputs)
        scorer_model = keras.models.Model(inputs=scorer_inputs, outputs=scorer_output)
        for layer in scorer_model.layers:
            if len(layer.get_weights()) == 0:
                continue
            try:
                layer.set_weights(model.get_layer(name=layer.name).get_weights())
            except Exception as e:
                print("Could not transfer weights for layer {}".format(layer.name))
                raise e
        user_model = keras.models.Model(inputs=user_inputs, outputs=user_embedding)
        item_model = keras.models.Model(inputs=pos_item_inputs, outputs=pos_item_embedding)
        return user_model, item_model, scorer_model
    else:
        def hinge_loss(y_true, y_pred):
            # hinge loss
            y_pos = y_pred[:, :1]
            y_neg = y_pred[:, 1:]
            loss = K.sum(K.maximum(0., 0.2 - y_pos + y_neg))
            return loss

        model.compile(loss=hinge_loss,
                      optimizer=Adam(lr=lr, decay=decay_rate),
                      metrics=['accuracy'])
        return model
Пример #13
0
    def __build_model(self, embedding_matrix):
        print("Building the model...")
        vocab_size = self.preprocessor.get_vocab_size(self.tokenizer)

        if self.embedding:
            main_input = Input(shape=(MAX_LENGTH, ),
                               dtype='int32',
                               name='main_input')
            x = Embedding(
                output_dim=EMBEDDING_SIZE,
                input_dim=vocab_size,
                input_length=MAX_LENGTH,
                weights=[embedding_matrix],
                trainable=self.trainable_embedding,
            )(main_input)

            if self.position_embd == True:
                weights = np.random.random((201, 50))
                position_input = Input(shape=(MAX_LENGTH, ),
                                       dtype='int32',
                                       name='position_input')
                x2 = PositionEmbedding(
                    input_shape=(MAX_LENGTH, ),
                    input_dim=100,
                    output_dim=50,
                    weights=[weights],
                    mode=PositionEmbedding.MODE_EXPAND,
                    name='position_embedding',
                )(position_input)
                x = keras.layers.concatenate([x, x2])

            if self.pos_tag is 'embedding':
                _, pos_size = self.preprocessor.get_pos_dict()
                pos_input = Input(shape=(MAX_LENGTH, ),
                                  dtype='int32',
                                  name='pos_input')
                x3 = Lambda(K.one_hot,
                            arguments={'num_classes': pos_size},
                            output_shape=(MAX_LENGTH, pos_size))(pos_input)
                x = keras.layers.concatenate([x, x3])

        else:
            new_embedding_size = EMBEDDING_SIZE
            if self.pos_tag is 'one_hot':
                new_embedding_size += 27
            if self.dependency is True:
                new_embedding_size += 2
            print('embedding size: ', new_embedding_size)
            main_input = Input(shape=(MAX_LENGTH, new_embedding_size),
                               name='main_input')

        print("1. Input")

        if self.use_rnn is True:
            if self.embedding is True:
                if self.rnn_type is 'gru':
                    x = Bidirectional(GRU(self.n_neuron,
                                          return_sequences=True))(x)
                else:
                    x = Bidirectional(
                        LSTM(self.n_neuron, return_sequences=True))(x)
            else:
                if self.rnn_type is 'gru':
                    x = Bidirectional(GRU(self.n_neuron,
                                          return_sequences=True))(main_input)
                else:
                    x = Bidirectional(
                        LSTM(self.n_neuron, return_sequences=True))(main_input)
            x = GlobalMaxPool1D()(x)
            # x = GlobalAvgPool1D()(x)
            x = Dropout(self.dropout)(x)

        print("2. LSTM")

        if self.use_cnn is True:
            pass

        if self.n_dense is not 0:
            for i in range(self.n_dense):
                x = Dense(self.n_neuron, activation='relu')(x)
                x = Dropout(self.dropout)(x)

        print("3. Dense")

        out = Dense(len(self.aspects), activation='sigmoid')(x)

        print("4. Out")

        x_input = list()
        x_input.append(main_input)

        if self.position_embd == True:
            x_input.append(position_input)
        if self.pos_tag is 'embedding':
            x_input.append(pos_input)

        model = Model(x_input, out)

        print("5. Model")

        model.summary()
        model.compile(loss='binary_crossentropy',
                      optimizer=self.optimizer,
                      metrics=[f1])

        print("6. Done")

        return model