示例#1
0
def sepcnn_model(blocks,
                 filters,
                 kernel_size,
                 embedding_dim,
                 dropout_rate,
                 pool_size,
                 input_shape,
                 num_classes,
                 num_features,
                 use_pretrained_embedding=False,
                 is_embedding_trainable=False,
                 embedding_matrix=None):
    """Creates an instance of a separable CNN model.

    # Arguments
        blocks: int, number of pairs of sepCNN and pooling blocks in the model.
        filters: int, output dimension of the layers.
        kernel_size: int, length of the convolution window.
        embedding_dim: int, dimension of the embedding vectors.
        dropout_rate: float, percentage of input to drop at Dropout layers.
        pool_size: int, factor by which to downscale input at MaxPooling layer.
        input_shape: tuple, shape of input to the model.
        num_classes: int, number of output classes.
        num_features: int, number of words (embedding input dimension).
        use_pretrained_embedding: bool, true if pre-trained embedding is on.
        is_embedding_trainable: bool, true if embedding layer is trainable.
        embedding_matrix: dict, dictionary with embedding coefficients.

    # Returns
        A sepCNN model instance.
    """
    op_units, op_activation = _get_last_layer_units_and_activation(num_classes)
    model = models.Sequential()

    # Add embedding layer. If pre-trained embedding is used add weights to the
    # embeddings layer and set trainable to input is_embedding_trainable flag.
    if use_pretrained_embedding:
        model.add(
            Embedding(input_dim=num_features,
                      output_dim=embedding_dim,
                      input_length=input_shape[0],
                      weights=[embedding_matrix],
                      trainable=is_embedding_trainable))
    else:
        model.add(
            Embedding(input_dim=num_features,
                      output_dim=embedding_dim,
                      input_length=input_shape[0]))

    for _ in range(blocks - 1):
        model.add(Dropout(rate=dropout_rate))
        model.add(
            SeparableConv1D(filters=filters,
                            kernel_size=kernel_size,
                            activation='relu',
                            bias_initializer='random_uniform',
                            depthwise_initializer='random_uniform',
                            padding='same'))
        model.add(
            SeparableConv1D(filters=filters,
                            kernel_size=kernel_size,
                            activation='relu',
                            bias_initializer='random_uniform',
                            depthwise_initializer='random_uniform',
                            padding='same'))
        model.add(MaxPooling1D(pool_size=pool_size))

    model.add(
        SeparableConv1D(filters=filters * 2,
                        kernel_size=kernel_size,
                        activation='relu',
                        bias_initializer='random_uniform',
                        depthwise_initializer='random_uniform',
                        padding='same'))
    model.add(
        SeparableConv1D(filters=filters * 2,
                        kernel_size=kernel_size,
                        activation='relu',
                        bias_initializer='random_uniform',
                        depthwise_initializer='random_uniform',
                        padding='same'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(rate=dropout_rate))
    model.add(Dense(op_units, activation=op_activation))

    return model
示例#2
0
# Sinir agı modelleme

# In[42]:

model = Sequential()

# In[43]:

embedding_boyut = 50

# In[44]:

model.add(
    Embedding(input_dim=max_kelime,
              output_dim=embedding_boyut,
              input_length=max_token,
              name='embedding_katman'))

# In[45]:

model.add(CuDNNGRU(units=16, return_sequences=True))
model.add(CuDNNGRU(units=8, return_sequences=True))
model.add(CuDNNGRU(units=4, return_sequences=False))
model.add(Dense(1, activation='sigmoid'))

# In[46]:

optimizer = Adam(lr=1e-3)

# In[47]:
示例#3
0
print (max_num_tokens)

print (np.sum(total_num_tokens < max_num_tokens) / len(total_num_tokens))
seq_pad = 'pre'
input_train_pad = pad_sequences(input_train_tokens, maxlen=max_num_tokens, padding=seq_pad, truncating=seq_pad)
input_test_pad = pad_sequences(input_test_tokens, maxlen=max_num_tokens, padding=seq_pad, truncating=seq_pad) 

print (input_train_pad.shape)

print (input_train_pad[1])

embedding_layer_size = 8

rnn_type_model = Sequential()
rnn_type_model.add(Embedding(input_dim=num_top_words,
                            output_dim=embedding_layer_size,
                            input_length=max_num_tokens,
                            name='embedding_layer'))
rnn_type_model.add(GRU(units=16, return_sequences=True))
rnn_type_model.add(GRU(units=4))
rnn_type_model.add(Dense(1, activation='sigmoid')) 
model_optimizer = Adam(lr=1e-3)
rnn_type_model.compile(loss='binary_crossentropy',
                        optimizer=model_optimizer,
                        metrics=['accuracy']) 

rnn_type_model.summary()

rnn_type_model.fit(input_train_pad, target_train,
                    validation_split=0.05, epochs=3, batch_size=64)

model_result = rnn_type_model.evaluate(input_test_pad, target_test)
示例#4
0
assert len(X_train['left']) == len(Y_train)

# --

# Model variables
gpus = 1
batch_size = 1024 * gpus
n_epoch = 50
n_hidden = 50

# Define the shared model
x = Sequential()
x.add(
    Embedding(len(embeddings),
              embedding_dim,
              weights=[embeddings],
              input_shape=(max_seq_length, ),
              trainable=False))
# CNN
# x.add(Conv1D(250, kernel_size=5, activation='relu'))
# x.add(GlobalMaxPool1D())
# x.add(Dense(250, activation='relu'))
# x.add(Dropout(0.3))
# x.add(Dense(1, activation='sigmoid'))
# LSTM
x.add(LSTM(n_hidden))

shared_model = x

# The visible layer
left_input = Input(shape=(max_seq_length, ), dtype='int32')
示例#5
0
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
import matplotlib.pyplot as plt
from keras.layers import Input, Dense, Embedding, Flatten
from keras.layers import SpatialDropout1D
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential
import keras

model2 = Sequential()

# Input / Embdedding
model2.add(Embedding(max_features, 150, input_length=max_words))

# CNN
model2.add(SpatialDropout1D(0.2))

model2.add(Conv1D(64, kernel_size=3, padding='same', activation='relu'))
model2.add(MaxPooling1D(pool_size=2))

model2.add(BatchNormalization())
model2.add(Conv1D(32, kernel_size=3, padding='same', activation='relu'))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Flatten())

# Output layer
model2.add(Dense(5, activation='softmax'))
示例#6
0
def NFFM(
    linear_feature_columns,
    dnn_feature_columns,
    embedding_size=4,
    dnn_hidden_units=(128, 128),
    l2_reg_embedding=1e-5,
    l2_reg_linear=1e-5,
    l2_reg_dnn=0,
    dnn_dropout=0,
    init_std=0.0001,
    seed=1024,
    use_bn=True,
    reduce_sum=False,
    task='binary',
):
    """Instantiates the Operation-aware Neural Networks  architecture.

    :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param embedding_size: positive integer,sparse feature embedding_size
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param l2_reg_linear: float. L2 regularizer strength applied to linear part.
    :param l2_reg_dnn: float . L2 regularizer strength applied to DNN
    :param init_std: float,to use as the initialize std of embedding vector
    :param seed: integer ,to use as random seed.
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param use_bn: bool,whether use bn after ffm out or not
    :param reduce_sum: bool,whether apply reduce_sum on cross vector
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.
    """

    features = build_input_features(linear_feature_columns +
                                    dnn_feature_columns)

    inputs_list = list(features.values())

    linear_logit = get_linear_logit(features,
                                    linear_feature_columns,
                                    l2_reg=l2_reg_linear,
                                    init_std=init_std,
                                    seed=seed,
                                    prefix='linear')

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []

    sparse_embedding = {
        fc_j.embedding_name: {
            fc_i.embedding_name:
            Embedding(fc_j.dimension,
                      embedding_size,
                      embeddings_initializer=RandomNormal(mean=0.0,
                                                          stddev=0.0001,
                                                          seed=seed),
                      embeddings_regularizer=l2(l2_reg_embedding),
                      mask_zero=isinstance(fc_j, VarLenSparseFeat),
                      name='sparse_emb_' + str(fc_j.embedding_name) + '_' +
                      fc_i.embedding_name)
            for fc_i in sparse_feature_columns + varlen_sparse_feature_columns
        }
        for fc_j in sparse_feature_columns + varlen_sparse_feature_columns
    }

    dense_value_list = get_dense_input(features, dnn_feature_columns)

    embed_list = []
    for fc_i, fc_j in itertools.combinations(
            sparse_feature_columns + varlen_sparse_feature_columns, 2):
        i_input = features[fc_i.name]
        if fc_i.use_hash:
            i_input = Hash(fc_i.dimension)(i_input)
        j_input = features[fc_j.name]
        if fc_j.use_hash:
            j_input = Hash(fc_j.dimension)(j_input)

        fc_i_embedding = feature_embedding(fc_i, fc_j, sparse_embedding,
                                           i_input)
        fc_j_embedding = feature_embedding(fc_j, fc_i, sparse_embedding,
                                           j_input)

        element_wise_prod = multiply([fc_i_embedding, fc_j_embedding])
        if reduce_sum:
            element_wise_prod = Lambda(lambda element_wise_prod: K.sum(
                element_wise_prod, axis=-1))(element_wise_prod)
        embed_list.append(element_wise_prod)

    ffm_out = tf.keras.layers.Flatten()(concat_fun(embed_list, axis=1))
    if use_bn:
        ffm_out = tf.keras.layers.BatchNormalization()(ffm_out)
    dnn_input = combined_dnn_input([ffm_out], dense_value_list)
    dnn_out = DNN(dnn_hidden_units,
                  l2_reg=l2_reg_dnn,
                  dropout_rate=dnn_dropout)(dnn_input)
    dnn_logit = Dense(1, use_bias=False)(dnn_out)

    if len(linear_feature_columns) > 0 and len(dnn_feature_columns) > 0:
        final_logit = add([dnn_logit, linear_logit])
    elif len(linear_feature_columns) > 0:
        final_logit = linear_logit
    elif len(dnn_feature_columns) > 0:
        final_logit = dnn_logit
    else:
        raise NotImplementedError

    output = PredictionLayer(task)(final_logit)

    model = Model(inputs=inputs_list, outputs=output)
    return model
示例#7
0
def DSIN(
    feature_dim_dict,
    sess_feature_list,
    embedding_size=8,
    sess_max_count=5,
    sess_len_max=10,
    att_embedding_size=1,
    att_head_num=8,
    dnn_hidden_units=(200, 80),
    dnn_activation='sigmoid',
    l2_reg_dnn=0,
    l2_reg_embedding=1e-6,
    task='binary',
    dnn_dropout=0,
    init_std=0.0001,
    seed=1024,
    encoding='bias',
):

    check_feature_config_dict(feature_dim_dict)

    print(
        'sess_count',
        sess_max_count,
        'encoding',
        encoding,
    )

    sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
        feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max)

    sparse_embedding_dict = {
        feat.name:
        Embedding(feat.dimension,
                  embedding_size,
                  embeddings_initializer=RandomNormal(mean=0.0,
                                                      stddev=init_std,
                                                      seed=seed),
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in sess_feature_list))
        for i, feat in enumerate(feature_dim_dict["sparse"])
    }

    query_emb_list = get_embedding_vec_list(sparse_embedding_dict,
                                            sparse_input,
                                            feature_dim_dict["sparse"],
                                            sess_feature_list,
                                            sess_feature_list)

    query_emb = concat_fun(query_emb_list)

    deep_input_emb_list = get_embedding_vec_list(
        sparse_embedding_dict,
        sparse_input,
        feature_dim_dict["sparse"],
        mask_feat_list=sess_feature_list)
    deep_input_emb = concat_fun(deep_input_emb_list)
    deep_input_emb = Flatten()(NoMask()(deep_input_emb))

    be_flag = True if encoding == 'bias' else False
    tr_input = sess_interest_division(sparse_embedding_dict,
                                      user_behavior_input_dict,
                                      feature_dim_dict['sparse'],
                                      sess_feature_list,
                                      sess_max_count,
                                      bias_encoding=be_flag)

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=False,
                                 use_positional_encoding=(not be_flag),
                                 seed=seed,
                                 supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(tr_input, sess_max_count,
                                       Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True,
        supports_masking=False)([query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(
        len(sess_feature_list) * embedding_size,
        layers=2,
        res_layers=0,
        dropout_rate=0.2,
    )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True)([query_emb, lstm_outputs, user_sess_length])

    deep_input_emb = Concatenate()([
        deep_input_emb,
        Flatten()(interest_attention_layer),
        Flatten()(lstm_attention_layer)
    ])
    if len(dense_input) > 0:
        deep_input_emb = Concatenate()([deep_input_emb] +
                                       list(dense_input.values()))

    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 False, seed)(deep_input_emb)
    output = Dense(1, use_bias=False, activation=None)(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    #sess_input_length_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(
            get_inputs_list([user_behavior_input_dict[sess_name]]))
        #sess_input_length_list.append(user_behavior_length_dict[sess_name])

    model_input_list = get_inputs_list(
        [sparse_input, dense_input]) + sess_input_list + [user_sess_length]

    model = Model(inputs=model_input_list, outputs=output)

    return model
#查看数据结构 tfidf[i][j]表示i类文本中的tf-idf权重
weight = tfidf.toarray()
print(weight)

#数据集划分
X_train, X_test, y_train, y_test = train_test_split(weight, Y)
print(X_train.shape, X_test.shape)
print(len(y_train), len(y_test))
#(15, 117) (6, 117) 15 6

#--------------------------------建模与训练-------------------------------
model = Sequential()

#构建Embedding层 128代表Embedding层的向量维度
model.add(Embedding(max_features, 128))

#构建LSTM层
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

#构建全连接层
#注意上面构建LSTM层时只会得到最后一个节点的输出,如果需要输出每个时间点的结果需将return_sequences=True
model.add(Dense(units=1, activation='sigmoid'))

#模型可视化
model.summary()

#激活神经网络
model.compile(
    optimizer='rmsprop',  #RMSprop优化器
    loss='binary_crossentropy',  #二元交叉熵损失
示例#9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Project      : tql-Python.
# @File         : mid_layer
# @Time         : 2019-07-12 16:33
# @Author       : yuanjie
# @Email        : [email protected]
# @Software     : PyCharm
# @Description  :
"""
https://www.tensorflow.org/beta/tutorials/keras/feature_columns
"""

from tensorflow.python.keras.layers import Input, Embedding, Reshape, Activation
from tensorflow.python.keras.models import Model

input_model = Input(shape=(1, ))
output_store = Embedding(1115, 10, name='store_embedding')(input_model)
output_store = Reshape(target_shape=(10, ))(output_store)

output_model = Activation('sigmoid')(output_store)
model = Model(inputs=input_model, outputs=output_model)
model.summary()

embed = Model(inputs=model.input, outputs=model.get_layer(index=1).output)
# 以这个model的预测值作为输出
embed.predict([[1]])
示例#10
0
train_seq_mat = sequence.pad_sequences(train_seq, maxlen=max_len)
val_seq_mat = sequence.pad_sequences(val_seq, maxlen=max_len)
test_seq_mat = sequence.pad_sequences(test_seq, maxlen=max_len)

print(train_seq_mat.shape)
print(val_seq_mat.shape)
print(test_seq_mat.shape)

# In[ ]:

# In[17]:

## 定义LSTM模型
inputs = Input(name='inputs', shape=[max_len])
## Embedding(词汇表大小,batch大小)
layer = Embedding(max_words + 1, 128, input_length=max_len)(inputs)
layer = LSTM(128)(layer)
layer = Dense(128, activation="relu", name="FC1")(layer)
layer = Dropout(0.5)(layer)
layer = Dense(3, activation="softmax", name="FC2")(layer)
model = Model(inputs=inputs, outputs=layer)
model.summary()
model.compile(loss="categorical_crossentropy",
              optimizer=RMSprop(),
              metrics=["accuracy"])

# In[15]:

# model = tf.keras.Sequential([
#    tf.keras.layers.Embedding(max_words+1,128,input_length=max_len),
#    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
def seq2seq_architecture(latent_size, vocabulary_size, article_max_len,
                         embedding_matrix, batch_size, epochs, train_article,
                         train_summary, train_target):
    encoder_inputs = Input(shape=(article_max_len, ), name='Encoder-Input')
    encoder_embeddings = Embedding(
        vocabulary_size + 1,
        300,
        weights=[embedding_matrix],
        trainable=False,
        mask_zero=False,
        name='Encoder-Word-Embedding')(encoder_inputs)
    encoder_embeddings = BatchNormalization(
        name='Encoder-Batch-Normalization')(encoder_embeddings)
    encoder_conv = Conv1D(filters=4,
                          kernel_size=8,
                          padding='same',
                          activation='relu')(encoder_embeddings)
    encoder_drop = Dropout(0.25)(encoder_conv)
    encoder_pool = MaxPooling1D(pool_size=1)(encoder_drop)
    encoder_flatten = Flatten()(encoder_pool)
    encoder_model = Model(inputs=encoder_inputs,
                          outputs=encoder_flatten,
                          name='Encoder-Model')
    encoder_outputs = encoder_model(encoder_inputs)

    decoder_inputs = Input(shape=(None, ), name='Decoder-Input')
    decoder_embeddings = Embedding(
        vocabulary_size + 1,
        300,
        weights=[embedding_matrix],
        trainable=False,
        mask_zero=False,
        name='Decoder-Word-Embedding')(decoder_inputs)
    decoder_embeddings = BatchNormalization(
        name='Decoder-Batch-Normalization-1')(decoder_embeddings)
    decoder_conv = Conv1D(filters=32, kernel_size=4, padding='same', activation='relu', name='Decoder-Conv1D') \
        (decoder_embeddings)
    decoder_drop = Dropout(0.25, name='Decoder-Conv1D-Dropout')(decoder_conv)
    decoder_pool = MaxPooling1D(pool_size=1, name='Decoder-MaxPool1D')(
        decoder_drop)  # GlobalMaxPool1D()

    decoder_gru = GRU(latent_size,
                      return_state=True,
                      return_sequences=True,
                      name='Decoder-GRU')
    decoder_gru_outputs, _ = decoder_gru(decoder_pool,
                                         initial_state=encoder_outputs)
    decoder_outputs = BatchNormalization(
        name='Decoder-Batch-Normalization-2')(decoder_gru_outputs)
    decoder_outputs = Dense(vocabulary_size + 1,
                            activation='softmax',
                            name='Final-Output-Dense')(decoder_outputs)

    seq2seq_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    seq2seq_model.compile(optimizer="adam",
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])
    seq2seq_model.summary()

    classes = [item for sublist in train_summary.tolist() for item in sublist]
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(classes),
                                                      classes)

    e_stopping = EarlyStopping(monitor='val_loss',
                               patience=4,
                               verbose=1,
                               mode='min',
                               restore_best_weights=True)
    history = seq2seq_model.fit(x=[train_article, train_summary],
                                y=np.expand_dims(train_target, -1),
                                batch_size=batch_size,
                                epochs=epochs,
                                validation_split=0.1,
                                callbacks=[e_stopping],
                                class_weight=class_weights)

    f = open("data/models/convgru_results.txt", "w", encoding="utf-8")
    f.write("ConvGRU \n layers: 1 \n latent size: " + str(latent_size) +
            "\n vocab size: " + str(vocabulary_size) + "\n")
    f.close()

    history_dict = history.history
    plot_loss(history_dict)

    # inference
    encoder_model = seq2seq_model.get_layer('Encoder-Model')

    decoder_inputs = seq2seq_model.get_layer('Decoder-Input').input
    decoder_embeddings = seq2seq_model.get_layer('Decoder-Word-Embedding')(
        decoder_inputs)
    decoder_embeddings = seq2seq_model.get_layer(
        'Decoder-Batch-Normalization-1')(decoder_embeddings)
    decoder_conv = seq2seq_model.get_layer('Decoder-Conv1D')(
        decoder_embeddings)
    decoder_drop = seq2seq_model.get_layer('Decoder-Conv1D-Dropout')(
        decoder_conv)
    decoder_pool = seq2seq_model.get_layer('Decoder-MaxPool1D')(decoder_drop)

    gru_inference_state_input = Input(shape=(latent_size, ),
                                      name='Hidden-State-Input')
    gru_out, gru_state_out = seq2seq_model.get_layer('Decoder-GRU')(
        [decoder_pool, gru_inference_state_input])
    decoder_outputs = seq2seq_model.get_layer('Decoder-Batch-Normalization-2')(
        gru_out)
    dense_out = seq2seq_model.get_layer('Final-Output-Dense')(decoder_outputs)
    decoder_model = Model([decoder_inputs, gru_inference_state_input],
                          [dense_out, gru_state_out])

    return encoder_model, decoder_model
示例#12
0
for v in vals.keys():
    vals[v] = vals[v] / len(y)
print(vals)
kfold = StratifiedKFold(n_splits=5, shuffle=True)
cvscores_avg = []
cvscores_against = []
cvscores_favor = []
tri = []
for train, test in kfold.split(x_train_pad, y):
    x_train = x_train_pad[train]
    y_train = y_train_onehot[train]
    x_test = x_train_pad[test]
    y_test = y_train_onehot[test]
    model = Sequential()
    model.add(Embedding(input_dim=num_words,
                        output_dim=embedding_size,
                        input_length=max_tokens,
                        name='layer_embedding'))
    # model.add(Flatten())
    # model.add(Dense(25, activation='elu'))
    # model.add(Dropout(0.5))
    model.add(GRU(units=16))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=1e-3),
                  metrics=['acc'])

    model.fit(x_train, y_train,
              epochs=20, batch_size=128, verbose=0)
    y_pred = model.predict(x_test).argmax(axis=-1)
    cm = classification_report(y_test.argmax(axis=-1), y_pred)
    dl = f1_score(y_true=y_test.argmax(axis=-1), y_pred=y_pred, average=None)
示例#13
0
                     activation='tanh',
                     dropout=0.2,
                     recurrent_dropout=0.2)
encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)

#encoder lstm 3
encoder_lstm3 = LSTM(latent_dim,
                     return_state=True,
                     return_sequences=True,
                     activation='tanh',
                     dropout=0.2,
                     recurrent_dropout=0.2)
encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2)

dec_input = Input(shape=(None, ))
dec_emb_layer = Embedding(output_size + 1, latent_dim, trainable=True)
dec_emb = dec_emb_layer(dec_input)

decoder_lstm = LSTM(latent_dim,
                    return_sequences=True,
                    return_state=True,
                    activation='tanh',
                    dropout=0.2,
                    recurrent_dropout=0.2)
decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm(
    dec_emb, initial_state=[state_h, state_c])

# Attention layer
attn_layer = AttentionLayer(name='attention_layer')
attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])
示例#14
0
def sepCNN(blocks,
           filters,
           kernel_size,
           embedding_dim,
           dropout_rate,
           pool_size,
           input_shape,
           num_features,
           pretrained_embedding=False,
           embedding_trainable=False,
           embedding_weights=None,
           learning_rate=1e-3):
    """ Creates an instance of a separable CNN model.

    Parameters
    ----------
    blocks: int
        Number of pairs of sepCNN and pooling blocks in the model. One block
        contains [DropOut, Conv1D, Conv1D, MaxPool]
    filters: int
        Output dimension of the layers.
    kernel_size: int
        Length of the convolution window.
    embedding_dim: int
        Dimension of the embedding vectors.
    dropout_rate: float
        Percentage of input to drop at Dropout layers.
    pool_size: int
        Factor by which to downscale input at MaxPooling layer.
    input_shape: tuple
        Shape of input to the model.
    num_features: int
        Number of words (embedding input dimension).
    pretrained_embedding: bool
        True if pre-trained embedding is on.
    embedding_trainable: bool
        True if embedding layer is trainable.
    embedding_weights: np.ndarray
        Dictionary with embedding coefficients.
    learning_rate: float
        Learning rate parameter for the model

    Returns
    -------
    model:
        A compiled sepCNN keras model instance.
    """

    model = Sequential()

    if pretrained_embedding:
        model.add(
            Embedding(num_features,
                      embedding_dim,
                      input_length=input_shape[0],
                      embeddings_initializer=Constant(embedding_weights),
                      trainable=embedding_trainable))
    else:
        model.add(
            Embedding(num_features, embedding_dim,
                      input_length=input_shape[0]))

    for _ in range(blocks - 1):
        model.add(Dropout(dropout_rate))
        model.add(
            SeparableConv1D(filters,
                            kernel_size,
                            activation='relu',
                            padding='same'))
        model.add(
            SeparableConv1D(filters,
                            kernel_size,
                            activation='relu',
                            padding='same'))
        model.add(MaxPooling1D(pool_size))

    model.add(
        SeparableConv1D(filters * 2,
                        kernel_size,
                        activation='relu',
                        padding='same'))
    model.add(
        SeparableConv1D(filters * 2,
                        kernel_size,
                        activation='relu',
                        padding='same'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))

    optimizer = Adam(lr=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['acc'])

    return model
示例#15
0
    def train(self, texts: List[str], target: List[int]) -> None:

        from tensorflow.python.keras.models import Model  #type: ignore
        from tensorflow.python.keras.layers import Input, Embedding, GRU, Dense, Bidirectional, GlobalMaxPool1D, concatenate  #type: ignore
        from tensorflow.keras.optimizers import Adam  #type: ignore
        from tensorflow.keras.callbacks import History  #type: ignore

        if self.downsampling:
            texts, target = downsample(texts, target, self.downsampling_ratio)

        if self.verbose:
            print('1. Vectorizing texts')

        NUMBER_OF_FEATURES: int = 20000
        self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES)
        self.tokenizer.fit_on_texts(texts)
        vocabulary: Dict[str, int] = self.tokenizer.word_index

        if self._max_sequence_length == 0:
            self._max_sequence_length = len(max(texts, key=len))

        vectorized_texts: array = self.vectorize_texts(texts)

        if self.include_casing_information:
            casing_information: array = self.texts_to_casing_information(texts)

        if self.embedding_location == '':
            if self.verbose:
                print('2. Skip (no embeddings)')
                print('3. Skip (no embeddings)')
        else:
            if self.verbose:
                print('2. Loading word embeddings')

            embedding_dictionary: Dict[
                str, List[float]] = load_embedding_dictionary(
                    self.embedding_location)
            nr_of_embedding_features: int = len(
                list(embedding_dictionary.values())
                [1])  # Check how many values we have for the first word

            if self.verbose:
                print('3. Creating embedding matrix')

            embedding_matrix: array = create_embedding_matrix_for_vocabulary(
                embedding_dictionary, vocabulary)

        if self.verbose:
            print('4. Building up model')

        #Define a simple BiGru model with a pretrained embedding layer
        word_input: Input = Input(shape=(self._max_sequence_length, ))

        if self.embedding_location == '':
            #Add an empty embedding layer if we have no pretrained embeddings
            EMPTY_EMBEDDING_LAYER_SIZE: int = 300
            layers = Embedding(
                len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE)(word_input)

        else:
            layers = Embedding(input_dim=len(vocabulary) + 1,
                               output_dim=nr_of_embedding_features,
                               input_length=vectorized_texts.shape[1],
                               weights=[embedding_matrix],
                               trainable=False)(word_input)

        #Add a separate 'entrance' for the casing information
        if self.include_casing_information:
            word_model: Model = Model(inputs=word_input, outputs=layers)

            casing_input: Input = Input(shape=(self._max_sequence_length, 1))

            casing_model: Model = Model(inputs=casing_input,
                                        outputs=casing_input)
            layers = concatenate([word_model.output, casing_model.output])

        if self.bidirectional:
            layers = Bidirectional(
                GRU(16, activation='tanh', return_sequences=True))(layers)
            layers = Bidirectional(
                GRU(16, activation='tanh', return_sequences=True))(layers)
        else:
            layers = GRU(16, activation='tanh', return_sequences=True)(layers)
            layers = GRU(16, activation='tanh', return_sequences=True)(layers)

        layers = GlobalMaxPool1D()(layers)

        layers = Dense(256)(layers)
        layers = Dense(256)(layers)

        layers = Dense(1, activation='sigmoid')(layers)

        if self.include_casing_information:
            model: Model = Model([word_model.input, casing_model.input],
                                 layers)
        else:
            model: Model = Model(word_input, layers)

        #Compile the model
        optimizer: Adam = Adam(lr=self.learning_rate)
        model.compile(optimizer=optimizer,
                      loss='binary_crossentropy',
                      metrics=['acc'])

        if self.verbose:
            print('5. training the model')

        if self.include_casing_information:

            input = [vectorized_texts, casing_information]

        else:

            input = vectorized_texts

        history: History = model.fit(
            input,
            target,
            epochs=self.learning_epochs,
            #validation_data=(test_vectors, test_target),
            verbose=1,  # Logs once per epoch.
            batch_size=self.learning_batch_size)

        self.model = model
示例#16
0
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import LSTM
from tensorflow.python.keras.layers import Conv1D
from tensorflow.python.keras.layers import MaxPooling1D
from tensorflow.python.keras.layers import Embedding
from tensorflow.python.keras.preprocessing import sequence

numpy.random.seed(7)
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)


max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

embedding_vecor_length = 32
model = Sequential()
model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=10, batch_size=64)
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
示例#17
0
    def train(self, texts: List[str], target: List[int]) -> None:

        from tensorflow.python.keras.models import Sequential  #type: ignore
        from tensorflow.python.keras.layers import Embedding, Dense, LSTM, GlobalMaxPool1D  #type: ignore
        from tensorflow.keras.optimizers import Adam  #type: ignore
        from tensorflow.keras.callbacks import History  #type: ignore

        if self.downsampling:
            texts, target = downsample(texts, target, self.downsampling_ratio)

        if self.verbose:
            print('1. Vectorizing texts')

        NUMBER_OF_FEATURES: int = 20000
        self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES)
        self.tokenizer.fit_on_texts(texts)
        vocabulary: Dict[str, int] = self.tokenizer.word_index

        if self._max_sequence_length == 0:
            self._max_sequence_length = len(max(texts, key=len))

        vectorized_texts: array = self.vectorize_texts(texts)

        if self.embedding_location == '':
            if self.verbose:
                print('2. Skip (no embeddings)')
                print('3. Skip (no embeddings)')
        else:
            if self.verbose:
                print('2. Loading word embeddings')

            embedding_dictionary: Dict[
                str, List[float]] = load_embedding_dictionary(
                    self.embedding_location)
            nr_of_embedding_features: int = len(
                list(embedding_dictionary.values())
                [1])  # Check how many values we have for the first word

            if self.verbose:
                print('3. Creating embedding matrix')

            embedding_matrix: array = create_embedding_matrix_for_vocabulary(
                embedding_dictionary, vocabulary)

        if self.verbose:
            print('4. Building up model')

        #Define a simple LSTM model with a pretrained embedding layer
        model: Sequential = Sequential()

        if self.embedding_location == '':
            #Add an empty embedding layer if we have no pretrained embeddings
            EMPTY_EMBEDDING_LAYER_SIZE: int = 300
            model.add(
                Embedding(len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE))

        else:
            model.add(
                Embedding(input_dim=len(vocabulary) + 1,
                          output_dim=nr_of_embedding_features,
                          input_length=vectorized_texts.shape[1],
                          weights=[embedding_matrix],
                          trainable=False))

        model.add(LSTM(16, return_sequences=True))
        model.add(LSTM(16, return_sequences=True))
        model.add(LSTM(16, return_sequences=True))
        model.add(GlobalMaxPool1D())

        model.add(Dense(256))
        model.add(Dense(256))

        model.add(Dense(1, activation='sigmoid'))

        #Compile the model
        optimizer: Adam = Adam(lr=self.learning_rate)
        model.compile(optimizer=optimizer,
                      loss='binary_crossentropy',
                      metrics=['acc'])

        if self.verbose:
            print('5. training the model')

        history: History = model.fit(
            vectorized_texts,
            target,
            epochs=self.learning_epochs,
            #validation_data=(test_vectors, test_target),
            verbose=1,  # Logs once per epoch.
            batch_size=self.learning_batch_size)

        self.model = model
示例#18
0
def seq2seq_architecture(latent_size, vocabulary_size, max_len_article,
                         embedding_matrix, batch_size, epochs, train_article,
                         train_summary, train_target):
    # encoder
    encoder_inputs = Input(shape=(None, ), name='Encoder-Input')
    encoder_embeddings = Embedding(vocabulary_size,
                                   300,
                                   weights=[embedding_matrix],
                                   trainable=False,
                                   mask_zero=True,
                                   name='Encoder-Word-Embedding')
    norm_encoder_embeddings = BatchNormalization(
        name='Encoder-Batch-Normalization')
    encoder_lstm_1 = LSTM(latent_size,
                          name='Encoder-LSTM-1',
                          return_sequences=True,
                          dropout=0.2,
                          recurrent_dropout=0.2)

    e = encoder_embeddings(encoder_inputs)
    e = norm_encoder_embeddings(e)
    encoder_outputs = encoder_lstm_1(e)
    encoder_last = encoder_outputs[:, -1, :]

    # decoder
    decoder_inputs = Input(shape=(None, ), name='Decoder-Input')
    decoder_embeddings = Embedding(vocabulary_size,
                                   300,
                                   weights=[embedding_matrix],
                                   trainable=False,
                                   mask_zero=True,
                                   name='Decoder-Word-Embedding')
    norm_decoder_embeddings = BatchNormalization(
        name='Decoder-Batch-Normalization-1')
    decoder_lstm_1 = LSTM(latent_size,
                          name='Decoder-LSTM-1',
                          return_sequences=True,
                          dropout=0.2,
                          recurrent_dropout=0.2)
    norm_decoder = BatchNormalization(name='Decoder-Batch-Normalization-2')
    attention_activation = Activation('softmax', name='Attention')
    dense_intermediate = TimeDistributed(
        Dense(64, activation="tanh", name="Intermediate-Output-Dense"))
    dense_final = TimeDistributed(
        Dense(vocabulary_size, activation="softmax",
              name="Final-Output-Dense"))

    d = decoder_embeddings(decoder_inputs)
    d = norm_decoder_embeddings(d)
    decoder_outputs = decoder_lstm_1(
        d, initial_state=[encoder_last, encoder_last])
    decoder_outputs = norm_decoder(decoder_outputs)
    attention = dot([decoder_outputs, encoder_outputs], axes=[2, 2])
    attention = attention_activation(attention)
    context = dot([attention, encoder_outputs], axes=[2, 1])
    decoder_combined_context = concatenate([context, decoder_outputs])
    outputs = dense_intermediate(decoder_combined_context)
    decoder_last = dense_final(outputs)

    seq2seq_model = Model(inputs=[encoder_inputs, decoder_inputs],
                          outputs=decoder_last)
    seq2seq_model.compile(optimizer="rmsprop",
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])
    seq2seq_model.summary()

    classes = [item for sublist in train_summary.tolist() for item in sublist]
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(classes),
                                                      classes)

    e_stopping = EarlyStopping(monitor='val_loss',
                               patience=4,
                               verbose=1,
                               mode='min',
                               restore_best_weights=True)
    history = seq2seq_model.fit(x=[train_article, train_summary],
                                y=np.expand_dims(train_target, -1),
                                batch_size=batch_size,
                                epochs=epochs,
                                validation_split=0.1,
                                class_weight=class_weights)

    f = open("data/models/results.txt", "w", encoding="utf-8")
    f.write("Attention LSTM \n layers: 1 \n latent size: " + str(latent_size) +
            "\n vocab size: " + str(vocabulary_size) + "\n")
    f.close()

    history_dict = history.history
    plot_loss(history_dict)

    return seq2seq_model
示例#19
0
    w2v_model = gensim.models.KeyedVectors.load_word2vec_format(VECTOR_DIR,
                                                                binary=True)
    embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM))
    not_in_model = 0
    in_model = 0
    for word, i in word_index.items():
        if unicode(word) in w2v_model:
            in_model += 1
            embedding_matrix[i] = np.asarray(w2v_model[unicode(word)],
                                             dtype='float32')
        else:
            not_in_model += 1
    print(str(not_in_model) + ' words not in w2v model')
    embedding_layer = Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=False)

    print('(5) training model...')
    model = Sequential()
    model.add(embedding_layer)
    model.add(Dropout(0.2))
    model.add(Conv1D(250, 3, padding='valid', activation='relu', strides=1))
    model.add(MaxPooling1D(3))
    model.add(Flatten())
    model.add(Dense(EMBEDDING_DIM, activation='relu'))
    model.add(Dense(labels.shape[1], activation='softmax'))
    model.summary()
    plot_model(model,
               to_file=os.path.join(ckpt_path, 'word_vector_cnn_model.png'),
batch = next(generator)          #通过创建一批数据来测试数据生成器

num_captions_train = [len(captions) for captions in captions_train]    #这是训练集中每个图像的描述数量。
total_num_captions_train = np.sum(num_captions_train)     #这是训练集中的描述总数。
steps_per_epoch = int(total_num_captions_train / batch_size)     #总的批次量

state_size = 512   #解码器由3个GRU组成,其内部大小为512
embedding_size = 128    #嵌入层的大小为128
transfer_values_input = Input(shape=(transfer_values_size,),        
                              name='transfer_values_input')         #将传输值输入到解码器中
decoder_transfer_map = Dense(state_size,                 
                             activation='tanh',
                             name='decoder_transfer_map')               #使用完全连接的层来映射4096到512个元素的向量。并使用tanh激活函数来限制-1,1之间的输出
decoder_input = Input(shape=(None, ), name='decoder_input')      #这是标记序列到解码器的输入。
decoder_embedding = Embedding(input_dim=num_words,
                              output_dim=embedding_size,
                              name='decoder_embedding')          #将整数标记的序列转换为向量序列。
#创建解码器的3个GRU层
decoder_gru1 = GRU(state_size, name='decoder_gru1',       
                   return_sequences=True)
decoder_gru2 = GRU(state_size, name='decoder_gru2',
                   return_sequences=True)
decoder_gru3 = GRU(state_size, name='decoder_gru3',
                   return_sequences=True)
decoder_dense = Dense(num_words,
                      activation='linear',
                      name='decoder_output')          #GRU层输出形状为[batch_size,sequence_length,state_size]的张量,其中每个“字”被编码为长度为state_size(512)的向量。 需要将其转换为整数标记序列,可以将其解释为词汇表中的单词。

#将解码器的所有层连接到传输值的输入。
def connect_decoder(transfer_values):
    # 映射传输值,使维度与GRU层的内部状态匹配。 可以使用映射的传输值作为GRU层的初始状态。
示例#21
0
decoder_input_data = tokens_snippets[:, :-1]
print("decoder input shape: ", decoder_input_data.shape)

decoder_output_data = tokens_snippets[:, 1:]
print("decoder output shape: ", decoder_output_data.shape)

print(decoder_input_data[5])
print(decoder_output_data[5])

print(tokenizer_snippets.tokens_to_string(decoder_input_data[5]))
print(tokenizer_snippets.tokens_to_string(decoder_output_data[5]))

encoder_input = Input(shape=(None, ), name='encoder_input')
embedding_size = 128
encoder_embedding = Embedding(input_dim=num_words,
                              output_dim=embedding_size,
                              name='encoder_embedding')
state_size = 512

encoder_gru1 = GRU(state_size, name='encoder_gru1', return_sequences=True)
encoder_gru2 = GRU(state_size, name='encoder_gru2', return_sequences=True)
encoder_gru3 = GRU(state_size, name='encoder_gru3', return_sequences=False)


def connect_encoder():
    # Start the neural network with its input-layer.
    net = encoder_input

    # Connect the embedding-layer.
    net = encoder_embedding(net)
示例#22
0
def DSIN(
    dnn_feature_columns,
    sess_feature_list,
    sess_max_count=5,
    bias_encoding=False,
    att_embedding_size=1,
    att_head_num=8,
    dnn_hidden_units=(200, 80),
    dnn_activation='sigmoid',
    dnn_dropout=0,
    dnn_use_bn=False,
    l2_reg_dnn=0,
    l2_reg_embedding=1e-6,
    seed=1024,
    task='binary',
):
    """Instantiates the Deep Session Interest Network architecture.

    :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
    :param sess_feature_list: list,to indicate  sequence sparse field
    :param sess_max_count: positive int, to indicate the max number of sessions
    :param sess_len_max: positive int, to indicate the max length of each session
    :param bias_encoding: bool. Whether use bias encoding or postional encoding
    :param att_embedding_size: positive int, the embedding size of each attention head
    :param att_head_num: positive int, the number of attention head
    :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
    :param dnn_activation: Activation function to use in deep net
    :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
    :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net
    :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
    :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
    :param seed: integer ,to use as random seed.
    :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
    :return: A Keras model instance.

    """

    hist_emb_size = sum(
        map(
            lambda fc: fc.embedding_dim,
            filter(lambda fc: fc.name in sess_feature_list,
                   dnn_feature_columns)))

    if (att_embedding_size * att_head_num != hist_emb_size):
        raise ValueError(
            "hist_emb_size must equal to att_embedding_size * att_head_num ,got %d != %d *%d"
            % (hist_emb_size, att_embedding_size, att_head_num))

    features = build_input_features(dnn_feature_columns)

    sparse_feature_columns = list(
        filter(lambda x: isinstance(x, SparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    dense_feature_columns = list(
        filter(lambda x: isinstance(x, DenseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []
    varlen_sparse_feature_columns = list(
        filter(lambda x: isinstance(x, VarLenSparseFeat),
               dnn_feature_columns)) if dnn_feature_columns else []

    sparse_varlen_feature_columns = []
    history_fc_names = list(map(lambda x: "sess" + x, sess_feature_list))
    for fc in varlen_sparse_feature_columns:
        feature_name = fc.name
        if feature_name in history_fc_names:
            continue
        else:
            sparse_varlen_feature_columns.append(fc)

    inputs_list = list(features.values())

    user_behavior_input_dict = {}
    for idx in range(sess_max_count):
        sess_input = OrderedDict()
        for i, feat in enumerate(sess_feature_list):
            sess_input[feat] = features["sess_" + str(idx) + "_" + feat]

        user_behavior_input_dict["sess_" + str(idx)] = sess_input

    user_sess_length = Input(shape=(1, ), name='sess_length')

    embedding_dict = {
        feat.embedding_name:
        Embedding(feat.vocabulary_size,
                  feat.embedding_dim,
                  embeddings_initializer=feat.embeddings_initializer,
                  embeddings_regularizer=l2(l2_reg_embedding),
                  name='sparse_emb_' + str(i) + '-' + feat.name,
                  mask_zero=(feat.name in sess_feature_list))
        for i, feat in enumerate(sparse_feature_columns)
    }

    query_emb_list = embedding_lookup(embedding_dict,
                                      features,
                                      sparse_feature_columns,
                                      sess_feature_list,
                                      sess_feature_list,
                                      to_list=True)
    dnn_input_emb_list = embedding_lookup(embedding_dict,
                                          features,
                                          sparse_feature_columns,
                                          mask_feat_list=sess_feature_list,
                                          to_list=True)
    dense_value_list = get_dense_input(features, dense_feature_columns)

    query_emb = concat_func(query_emb_list, mask=True)

    dnn_input_emb = Flatten()(concat_func(dnn_input_emb_list))

    tr_input = sess_interest_division(embedding_dict,
                                      user_behavior_input_dict,
                                      sparse_feature_columns,
                                      sess_feature_list,
                                      sess_max_count,
                                      bias_encoding=bias_encoding)

    Self_Attention = Transformer(att_embedding_size,
                                 att_head_num,
                                 dropout_rate=0,
                                 use_layer_norm=False,
                                 use_positional_encoding=(not bias_encoding),
                                 seed=seed,
                                 supports_masking=True,
                                 blinding=True)
    sess_fea = sess_interest_extractor(tr_input, sess_max_count,
                                       Self_Attention)

    interest_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True,
        supports_masking=False)([query_emb, sess_fea, user_sess_length])

    lstm_outputs = BiLSTM(
        hist_emb_size,
        layers=2,
        res_layers=0,
        dropout_rate=0.2,
    )(sess_fea)
    lstm_attention_layer = AttentionSequencePoolingLayer(
        att_hidden_units=(64, 16),
        weight_normalization=True)([query_emb, lstm_outputs, user_sess_length])

    dnn_input_emb = Concatenate()([
        dnn_input_emb,
        Flatten()(interest_attention_layer),
        Flatten()(lstm_attention_layer)
    ])

    dnn_input_emb = combined_dnn_input([dnn_input_emb], dense_value_list)
    output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
                 dnn_use_bn, seed)(dnn_input_emb)
    output = Dense(1, use_bias=False, activation=None)(output)
    output = PredictionLayer(task)(output)

    sess_input_list = []
    # sess_input_length_list = []
    for i in range(sess_max_count):
        sess_name = "sess_" + str(i)
        sess_input_list.extend(
            get_inputs_list([user_behavior_input_dict[sess_name]]))
        # sess_input_length_list.append(user_behavior_length_dict[sess_name])

    model = Model(inputs=inputs_list + [user_sess_length], outputs=output)

    return model
def keras_estimator(model_dir,
                    config,
                    learning_rate,
                    filters=64,
                    dropout_rate=0.2,
                    embedding_dim=200,
                    kernel_size=3,
                    pool_size=3,
                    embedding_path=None,
                    word_index=None):
    # Create model instance.
    model = models.Sequential()
    num_features = min(len(word_index) + 1, TOP_K)

    # Add embedding layer. If pre-trained embedding is used add weights to the
    # embeddings layer and set trainable to input is_embedding_trainable flag.
    # Embedding coverts each integer to a richer representation using a vector of floats
    if embedding_path != None:
        embedding_matrix = get_embedding_matrix(word_index, embedding_path,
                                                embedding_dim)
        is_embedding_trainable = True  # set to False to freeze embedding weights

        model.add(
            Embedding(input_dim=num_features,
                      output_dim=embedding_dim,
                      input_length=MAX_SEQUENCE_LENGTH,
                      weights=[embedding_matrix],
                      trainable=is_embedding_trainable))
    else:
        model.add(
            Embedding(input_dim=num_features,
                      output_dim=embedding_dim,
                      input_length=MAX_SEQUENCE_LENGTH))

    model.add(Dropout(rate=dropout_rate))
    model.add(
        Conv1D(filters=filters,
               kernel_size=kernel_size,
               activation='relu',
               bias_initializer='random_uniform',
               padding='same'))

    model.add(MaxPooling1D(pool_size=pool_size))
    model.add(
        Conv1D(filters=filters * 2,
               kernel_size=kernel_size,
               activation='relu',
               bias_initializer='random_uniform',
               padding='same'))
    model.add(
        GlobalAveragePooling1D()
    )  # pool size = size of the input, way of flattening the output of our CNN
    model.add(Dropout(rate=dropout_rate))
    model.add(Dense(len(CLASSES), activation='softmax'))

    # Compile model with learning parameters.
    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['acc'])
    estimator = tf.keras.estimator.model_to_estimator(keras_model=model,
                                                      model_dir=model_dir,
                                                      config=config)

    return estimator
示例#24
0
def create_embedding_dict(sparse_feature_columns,
                          varlen_sparse_feature_columns,
                          embedding_size,
                          init_std,
                          seed,
                          l2_reg,
                          prefix='sparse_',
                          seq_mask_zero=True):
    if embedding_size == 'auto':
        print("Notice:Do not use auto embedding in models other than DCN")
        sparse_embedding = {
            feat.embedding_name:
            Embedding(feat.dimension,
                      6 * int(pow(feat.dimension, 0.25)),
                      embeddings_initializer=RandomNormal(mean=0.0,
                                                          stddev=init_std,
                                                          seed=seed),
                      embeddings_regularizer=l2(l2_reg),
                      name=prefix + '_emb_' + feat.name)
            for feat in sparse_feature_columns
        }
    else:

        sparse_embedding = {
            feat.embedding_name:
            Embedding(feat.dimension,
                      embedding_size,
                      embeddings_initializer=RandomNormal(mean=0.0,
                                                          stddev=init_std,
                                                          seed=seed),
                      embeddings_regularizer=l2(l2_reg),
                      name=prefix + '_emb_' + feat.name)
            for feat in sparse_feature_columns
        }

    if varlen_sparse_feature_columns and len(
            varlen_sparse_feature_columns) > 0:
        for feat in varlen_sparse_feature_columns:
            # if feat.name not in sparse_embedding:
            if embedding_size == "auto":
                sparse_embedding[feat.embedding_name] = Embedding(
                    feat.dimension,
                    6 * int(pow(feat.dimension, 0.25)),
                    embeddings_initializer=RandomNormal(mean=0.0,
                                                        stddev=init_std,
                                                        seed=seed),
                    embeddings_regularizer=l2(l2_reg),
                    name=prefix + '_seq_emb_' + feat.name,
                    mask_zero=seq_mask_zero)

            else:
                sparse_embedding[feat.embedding_name] = Embedding(
                    feat.dimension,
                    embedding_size,
                    embeddings_initializer=RandomNormal(mean=0.0,
                                                        stddev=init_std,
                                                        seed=seed),
                    embeddings_regularizer=l2(l2_reg),
                    name=prefix + '_seq_emb_' + feat.name,
                    mask_zero=seq_mask_zero)
    return sparse_embedding
示例#25
0
    return x, y


train_x, train_y = preprocss(train_data)
test_x, test_y = preprocss(eval_data)

EPOCHS = 10
EMBED_DIM = 64
BiRNN_UNITS = 200

vacab_size = vocabulary_lookuper.size()
tag_size = tag_lookuper.size()

model = Sequential()
model.add(Embedding(vacab_size, EMBED_DIM, mask_zero=True))
model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True)))
model.add(CRF(tag_size))

# print model summary
model.summary()

callbacks_list = []

# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=config['summary_log_dir'])
# callbacks_list.append(tensorboard_callback)
#
# checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
#     os.path.join(config['model_dir'], 'cp-{epoch:04d}.ckpt'),
#     load_weights_on_restart=True,
#     verbose=1
示例#26
0
def create_model(num_encorder_paragraph_tokens,
                 max_encoder_paragraph_seq_length, num_encoder_question_tokens,
                 max_encoder_question_seq_length, num_decoder_tokens):
    hidden_units = 128  # 256, 128, 64
    embed_hidden_units = 100

    context_inputs = Input(shape=(None, ), name='context_inputs')
    encoded_context = Embedding(input_dim=num_encorder_paragraph_tokens,
                                output_dim=embed_hidden_units,
                                input_length=max_encoder_paragraph_seq_length,
                                name='context_embedding')(context_inputs)
    encoded_context = Dropout(0.3)(encoded_context)

    question_inputs = Input(shape=(None, ), name='question_inputs')
    encoded_question = Embedding(input_dim=num_encoder_question_tokens,
                                 output_dim=embed_hidden_units,
                                 input_length=max_encoder_question_seq_length,
                                 name='question_embedding')(question_inputs)
    encoded_question = Dropout(0.3)(encoded_question)
    encoded_question = LSTM(units=embed_hidden_units,
                            name='question_lstm')(encoded_question)
    encoded_question = RepeatVector(max_encoder_paragraph_seq_length)(
        encoded_question)

    merged = add([encoded_context, encoded_question])

    encoder_lstm = LSTM(units=hidden_units,
                        return_state=True,
                        name='encoder_lstm')
    encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(merged)
    encoder_states = [encoder_state_h, encoder_state_c]

    decoder_inputs = Input(shape=(None, num_decoder_tokens),
                           name='decoder_inputs')
    decoder_lstm = LSTM(units=hidden_units,
                        return_state=True,
                        return_sequences=True,
                        name='decoder_lstm')
    decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm(
        decoder_inputs, initial_state=encoder_states)
    decoder_dense = Dense(units=num_decoder_tokens,
                          activation='softmax',
                          name='decoder_dense')
    decoder_outputs = decoder_dense(decoder_outputs)

    model = Model([context_inputs, question_inputs, decoder_inputs],
                  decoder_outputs)
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    encoder_model = Model([context_inputs, question_inputs], encoder_states)

    decoder_state_inputs = [
        Input(shape=(hidden_units, )),
        Input(shape=(hidden_units, ))
    ]
    decoder_outputs, state_h, state_c = decoder_lstm(
        decoder_inputs, initial_state=decoder_state_inputs)

    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_state_inputs,
                          [decoder_outputs] + decoder_states)

    return model, encoder_model, decoder_model
示例#27
0

    print('(3) split data set...')
    p1 = int(len(data) * (1 - VALIDATION_SPLIT - TEST_SPLIT))
    p2 = int(len(data) * (1 - TEST_SPLIT))
    x_train = data[:p1]
    y_train = labels[:p1]
    x_val = data[p1:p2]
    y_val = labels[p1:p2]
    x_test = data[p2:]
    y_test = labels[p2:]
    print('train docs: ' + str(len(x_train)), 'val docs: ' + str(len(x_val)), 'test docs: ' + str(len(x_test)))


    print('(4) training model...')
    model = Sequential()
    model.add(Embedding(len(word_index) + 1, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH))
    model.add(LSTM(200, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dropout(0.2))
    model.add(Dense(labels.shape[1], activation='softmax'))
    model.summary()
    plot_model(model, to_file=os.path.join(ckpt_path, 'lstm_model.png'), show_shapes=True)

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])
    print(model.metrics_names)
    model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=2, batch_size=128)
    model.save(os.path.join(ckpt_path, 'lstm.h5'))

    print('(5) testing model...')
    print(model.evaluate(x_test, y_test))
def seq2seq_architecture(latent_size, vocabulary_size, embedding_matrix,
                         batch_size, epochs, train_article, train_summary,
                         train_target):
    # encoder
    encoder_inputs = Input(shape=(None, ), name='Encoder-Input')
    encoder_embeddings = Embedding(
        vocabulary_size + 1,
        300,
        weights=[embedding_matrix],
        trainable=False,
        mask_zero=True,
        name='Encoder-Word-Embedding')(encoder_inputs)
    encoder_embeddings = BatchNormalization(
        name='Encoder-Batch-Normalization')(encoder_embeddings)
    _, state_h, state_c = LSTM(latent_size,
                               return_state=True,
                               dropout=0.2,
                               recurrent_dropout=0.2,
                               name='Encoder-LSTM')(encoder_embeddings)
    encoder_states = [state_h, state_c]
    encoder_model = Model(inputs=encoder_inputs,
                          outputs=encoder_states,
                          name='Encoder-Model')
    encoder_outputs = encoder_model(encoder_inputs)

    # decoder
    decoder_inputs = Input(shape=(None, ), name='Decoder-Input')
    decoder_embeddings = Embedding(
        vocabulary_size + 1,
        300,
        weights=[embedding_matrix],
        trainable=False,
        mask_zero=True,
        name='Decoder-Word-Embedding')(decoder_inputs)
    decoder_embeddings = BatchNormalization(
        name='Decoder-Batch-Normalization-1')(decoder_embeddings)
    decoder_lstm = LSTM(latent_size,
                        return_state=True,
                        return_sequences=True,
                        dropout=0.2,
                        recurrent_dropout=0.2,
                        name='Decoder-LSTM')
    decoder_lstm_outputs, _, _ = decoder_lstm(decoder_embeddings,
                                              initial_state=encoder_outputs)
    decoder_batchnorm = BatchNormalization(
        name='Decoder-Batch-Normalization-2')(decoder_lstm_outputs)
    decoder_outputs = Dense(vocabulary_size + 1,
                            activation='softmax',
                            name='Final-Output-Dense')(decoder_batchnorm)

    seq2seq_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    seq2seq_model.compile(optimizer="adam",
                          loss='sparse_categorical_crossentropy',
                          metrics=['sparse_categorical_accuracy'])
    seq2seq_model.summary()

    classes = [item for sublist in train_summary.tolist() for item in sublist]
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(classes),
                                                      classes)

    e_stopping = EarlyStopping(monitor='val_loss',
                               patience=4,
                               verbose=1,
                               mode='min',
                               restore_best_weights=True)
    history = seq2seq_model.fit(x=[train_article, train_summary],
                                y=np.expand_dims(train_target, -1),
                                batch_size=batch_size,
                                epochs=epochs,
                                validation_split=0.1,
                                callbacks=[e_stopping],
                                class_weight=class_weights)

    f = open("data/models/lstm_results.txt", "w", encoding="utf-8")
    f.write("LSTM \n layers: 1 \n latent size: " + str(latent_size) +
            "\n vocab size: " + str(vocabulary_size) + "\n")
    f.close()

    history_dict = history.history
    plot_loss(history_dict)

    # inference
    encoder_model = seq2seq_model.get_layer('Encoder-Model')

    decoder_inputs = seq2seq_model.get_layer('Decoder-Input').input
    decoder_embeddings = seq2seq_model.get_layer('Decoder-Word-Embedding')(
        decoder_inputs)
    decoder_embeddings = seq2seq_model.get_layer(
        'Decoder-Batch-Normalization-1')(decoder_embeddings)
    inference_state_h_input = Input(shape=(latent_size, ),
                                    name='Hidden-State-Input')
    inference_state_c_input = Input(shape=(latent_size, ),
                                    name='Cell-State-Input')

    lstm_out, lstm_state_h_out, lstm_state_c_out = seq2seq_model.get_layer(
        'Decoder-LSTM')([
            decoder_embeddings, inference_state_h_input,
            inference_state_c_input
        ])
    decoder_outputs = seq2seq_model.get_layer('Decoder-Batch-Normalization-2')(
        lstm_out)
    dense_out = seq2seq_model.get_layer('Final-Output-Dense')(decoder_outputs)
    decoder_model = Model(
        [decoder_inputs, inference_state_h_input, inference_state_c_input],
        [dense_out, lstm_state_h_out, lstm_state_c_out])

    return encoder_model, decoder_model
示例#29
0
steps = int(len(all_caps_train) / batch_size)
activation_vector_length = vgg_activations.shape[1]
del captions
del captions_marked
del coco_inst
del coco_caps

image_activation_input = Input(shape=(activation_vector_length, ),
                               name='img_act_input')

model_map_layer = Dense(cell_state_size, activation='tanh',
                        name='fc_map')(image_activation_input)

lang_model_input = Input(shape=(None, ), name="lang_input")
lang_embed = Embedding(input_dim=num_words,
                       output_dim=embedding_size,
                       name='lang_embed')(lang_model_input)

lang_gru1 = GRU(cell_state_size, name='lang_gru1',
                return_sequences=True)(lang_embed,
                                       initial_state=model_map_layer)
lang_gru2 = GRU(cell_state_size, name='lang_gru2',
                return_sequences=True)(lang_gru1,
                                       initial_state=model_map_layer)
lang_gru3 = GRU(cell_state_size, name='lang_gru3',
                return_sequences=True)(lang_gru2,
                                       initial_state=model_map_layer)

lang_out = Dense(num_words, activation='linear', name='lang_out')(lang_gru3)
language_model = Model(inputs=[image_activation_input, lang_model_input],
                       outputs=[lang_out])
示例#30
0
def MTL_with_Title(
    feature_dim_dict,
    embedding_size=8,
    hidden_size=(256, 256),
    cin_layer_size=(
        256,
        256,
    ),
    cin_split_half=True,
    task_net_size=(128, ),
    l2_reg_linear=0.00001,
    l2_reg_embedding=0.00001,
    seed=1024,
):
    check_feature_config_dict(feature_dim_dict)
    if len(task_net_size) < 1:
        raise ValueError('task_net_size must be at least one layer')

    # xDeepFM Model

    deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding(
        feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear,
        0.0001, seed)

    fm_input = concat_fun(deep_emb_list, axis=1)

    if len(cin_layer_size) > 0:
        exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input)
        exFM_logit = tf.keras.layers.Dense(
            1,
            activation=None,
        )(exFM_out)

    deep_input = tf.keras.layers.Flatten()(fm_input)
    deep_out = MLP(hidden_size)(deep_input)

    finish_out = MLP(task_net_size)(deep_out)
    finish_logit = tf.keras.layers.Dense(1, use_bias=False,
                                         activation=None)(finish_out)

    like_out = MLP(task_net_size)(deep_out)
    like_logit = tf.keras.layers.Dense(1, use_bias=False,
                                       activation=None)(like_out)

    # Add Title Features

    title_input = Input(shape=(35, ), dtype='int32', name='title_input')
    title_embedding = Embedding(output_dim=32,
                                input_dim=134545,
                                input_length=35)(title_input)
    lstm_out = LSTM(units=32, return_sequences=True)(title_embedding)
    avg_out = GlobalAveragePooling1D()(lstm_out)
    dense1 = Dense(32, activation='relu')(avg_out)
    dense2 = Dense(1, activation='relu')(dense1)

    #

    finish_logit = tf.keras.layers.add(
        [linear_logit, finish_logit, exFM_logit, dense2])
    like_logit = tf.keras.layers.add(
        [linear_logit, like_logit, exFM_logit, dense2])

    output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit)
    output_like = PredictionLayer('sigmoid', name='like')(like_logit)
    print(str(inputs_list))
    inputs_list.append(title_input)
    model = tf.keras.models.Model(inputs=inputs_list,
                                  outputs=[output_finish, output_like])
    return model