def sepcnn_model(blocks, filters, kernel_size, embedding_dim, dropout_rate, pool_size, input_shape, num_classes, num_features, use_pretrained_embedding=False, is_embedding_trainable=False, embedding_matrix=None): """Creates an instance of a separable CNN model. # Arguments blocks: int, number of pairs of sepCNN and pooling blocks in the model. filters: int, output dimension of the layers. kernel_size: int, length of the convolution window. embedding_dim: int, dimension of the embedding vectors. dropout_rate: float, percentage of input to drop at Dropout layers. pool_size: int, factor by which to downscale input at MaxPooling layer. input_shape: tuple, shape of input to the model. num_classes: int, number of output classes. num_features: int, number of words (embedding input dimension). use_pretrained_embedding: bool, true if pre-trained embedding is on. is_embedding_trainable: bool, true if embedding layer is trainable. embedding_matrix: dict, dictionary with embedding coefficients. # Returns A sepCNN model instance. """ op_units, op_activation = _get_last_layer_units_and_activation(num_classes) model = models.Sequential() # Add embedding layer. If pre-trained embedding is used add weights to the # embeddings layer and set trainable to input is_embedding_trainable flag. if use_pretrained_embedding: model.add( Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=input_shape[0], weights=[embedding_matrix], trainable=is_embedding_trainable)) else: model.add( Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=input_shape[0])) for _ in range(blocks - 1): model.add(Dropout(rate=dropout_rate)) model.add( SeparableConv1D(filters=filters, kernel_size=kernel_size, activation='relu', bias_initializer='random_uniform', depthwise_initializer='random_uniform', padding='same')) model.add( SeparableConv1D(filters=filters, kernel_size=kernel_size, activation='relu', bias_initializer='random_uniform', depthwise_initializer='random_uniform', padding='same')) model.add(MaxPooling1D(pool_size=pool_size)) model.add( SeparableConv1D(filters=filters * 2, kernel_size=kernel_size, activation='relu', bias_initializer='random_uniform', depthwise_initializer='random_uniform', padding='same')) model.add( SeparableConv1D(filters=filters * 2, kernel_size=kernel_size, activation='relu', bias_initializer='random_uniform', depthwise_initializer='random_uniform', padding='same')) model.add(GlobalAveragePooling1D()) model.add(Dropout(rate=dropout_rate)) model.add(Dense(op_units, activation=op_activation)) return model
# Sinir agı modelleme # In[42]: model = Sequential() # In[43]: embedding_boyut = 50 # In[44]: model.add( Embedding(input_dim=max_kelime, output_dim=embedding_boyut, input_length=max_token, name='embedding_katman')) # In[45]: model.add(CuDNNGRU(units=16, return_sequences=True)) model.add(CuDNNGRU(units=8, return_sequences=True)) model.add(CuDNNGRU(units=4, return_sequences=False)) model.add(Dense(1, activation='sigmoid')) # In[46]: optimizer = Adam(lr=1e-3) # In[47]:
print (max_num_tokens) print (np.sum(total_num_tokens < max_num_tokens) / len(total_num_tokens)) seq_pad = 'pre' input_train_pad = pad_sequences(input_train_tokens, maxlen=max_num_tokens, padding=seq_pad, truncating=seq_pad) input_test_pad = pad_sequences(input_test_tokens, maxlen=max_num_tokens, padding=seq_pad, truncating=seq_pad) print (input_train_pad.shape) print (input_train_pad[1]) embedding_layer_size = 8 rnn_type_model = Sequential() rnn_type_model.add(Embedding(input_dim=num_top_words, output_dim=embedding_layer_size, input_length=max_num_tokens, name='embedding_layer')) rnn_type_model.add(GRU(units=16, return_sequences=True)) rnn_type_model.add(GRU(units=4)) rnn_type_model.add(Dense(1, activation='sigmoid')) model_optimizer = Adam(lr=1e-3) rnn_type_model.compile(loss='binary_crossentropy', optimizer=model_optimizer, metrics=['accuracy']) rnn_type_model.summary() rnn_type_model.fit(input_train_pad, target_train, validation_split=0.05, epochs=3, batch_size=64) model_result = rnn_type_model.evaluate(input_test_pad, target_test)
assert len(X_train['left']) == len(Y_train) # -- # Model variables gpus = 1 batch_size = 1024 * gpus n_epoch = 50 n_hidden = 50 # Define the shared model x = Sequential() x.add( Embedding(len(embeddings), embedding_dim, weights=[embeddings], input_shape=(max_seq_length, ), trainable=False)) # CNN # x.add(Conv1D(250, kernel_size=5, activation='relu')) # x.add(GlobalMaxPool1D()) # x.add(Dense(250, activation='relu')) # x.add(Dropout(0.3)) # x.add(Dense(1, activation='sigmoid')) # LSTM x.add(LSTM(n_hidden)) shared_model = x # The visible layer left_input = Input(shape=(max_seq_length, ), dtype='int32')
from keras.losses import categorical_crossentropy from keras.optimizers import Adam from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score import matplotlib.pyplot as plt from keras.layers import Input, Dense, Embedding, Flatten from keras.layers import SpatialDropout1D from keras.layers.convolutional import Conv1D, MaxPooling1D from keras.layers.normalization import BatchNormalization from keras.models import Sequential import keras model2 = Sequential() # Input / Embdedding model2.add(Embedding(max_features, 150, input_length=max_words)) # CNN model2.add(SpatialDropout1D(0.2)) model2.add(Conv1D(64, kernel_size=3, padding='same', activation='relu')) model2.add(MaxPooling1D(pool_size=2)) model2.add(BatchNormalization()) model2.add(Conv1D(32, kernel_size=3, padding='same', activation='relu')) model2.add(MaxPooling1D(pool_size=2)) model2.add(Flatten()) # Output layer model2.add(Dense(5, activation='softmax'))
def NFFM( linear_feature_columns, dnn_feature_columns, embedding_size=4, dnn_hidden_units=(128, 128), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, dnn_dropout=0, init_std=0.0001, seed=1024, use_bn=True, reduce_sum=False, task='binary', ): """Instantiates the Operation-aware Neural Networks architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_linear: float. L2 regularizer strength applied to linear part. :param l2_reg_dnn: float . L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param use_bn: bool,whether use bn after ffm out or not :param reduce_sum: bool,whether apply reduce_sum on cross vector :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] sparse_embedding = { fc_j.embedding_name: { fc_i.embedding_name: Embedding(fc_j.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=0.0001, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), mask_zero=isinstance(fc_j, VarLenSparseFeat), name='sparse_emb_' + str(fc_j.embedding_name) + '_' + fc_i.embedding_name) for fc_i in sparse_feature_columns + varlen_sparse_feature_columns } for fc_j in sparse_feature_columns + varlen_sparse_feature_columns } dense_value_list = get_dense_input(features, dnn_feature_columns) embed_list = [] for fc_i, fc_j in itertools.combinations( sparse_feature_columns + varlen_sparse_feature_columns, 2): i_input = features[fc_i.name] if fc_i.use_hash: i_input = Hash(fc_i.dimension)(i_input) j_input = features[fc_j.name] if fc_j.use_hash: j_input = Hash(fc_j.dimension)(j_input) fc_i_embedding = feature_embedding(fc_i, fc_j, sparse_embedding, i_input) fc_j_embedding = feature_embedding(fc_j, fc_i, sparse_embedding, j_input) element_wise_prod = multiply([fc_i_embedding, fc_j_embedding]) if reduce_sum: element_wise_prod = Lambda(lambda element_wise_prod: K.sum( element_wise_prod, axis=-1))(element_wise_prod) embed_list.append(element_wise_prod) ffm_out = tf.keras.layers.Flatten()(concat_fun(embed_list, axis=1)) if use_bn: ffm_out = tf.keras.layers.BatchNormalization()(ffm_out) dnn_input = combined_dnn_input([ffm_out], dense_value_list) dnn_out = DNN(dnn_hidden_units, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout)(dnn_input) dnn_logit = Dense(1, use_bias=False)(dnn_out) if len(linear_feature_columns) > 0 and len(dnn_feature_columns) > 0: final_logit = add([dnn_logit, linear_logit]) elif len(linear_feature_columns) > 0: final_logit = linear_logit elif len(dnn_feature_columns) > 0: final_logit = dnn_logit else: raise NotImplementedError output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model
def DSIN( feature_dim_dict, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', l2_reg_dnn=0, l2_reg_embedding=1e-6, task='binary', dnn_dropout=0, init_std=0.0001, seed=1024, encoding='bias', ): check_feature_config_dict(feature_dim_dict) print( 'sess_count', sess_max_count, 'encoding', encoding, ) sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input( feature_dim_dict, sess_feature_list, sess_max_count, sess_len_max) sparse_embedding_dict = { feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(feature_dim_dict["sparse"]) } query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], sess_feature_list, sess_feature_list) query_emb = concat_fun(query_emb_list) deep_input_emb_list = get_embedding_vec_list( sparse_embedding_dict, sparse_input, feature_dim_dict["sparse"], mask_feat_list=sess_feature_list) deep_input_emb = concat_fun(deep_input_emb_list) deep_input_emb = Flatten()(NoMask()(deep_input_emb)) be_flag = True if encoding == 'bias' else False tr_input = sess_interest_division(sparse_embedding_dict, user_behavior_input_dict, feature_dim_dict['sparse'], sess_feature_list, sess_max_count, bias_encoding=be_flag) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not be_flag), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( len(sess_feature_list) * embedding_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) deep_input_emb = Concatenate()([ deep_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) if len(dense_input) > 0: deep_input_emb = Concatenate()([deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed)(deep_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] #sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) #sess_input_length_list.append(user_behavior_length_dict[sess_name]) model_input_list = get_inputs_list( [sparse_input, dense_input]) + sess_input_list + [user_sess_length] model = Model(inputs=model_input_list, outputs=output) return model
#查看数据结构 tfidf[i][j]表示i类文本中的tf-idf权重 weight = tfidf.toarray() print(weight) #数据集划分 X_train, X_test, y_train, y_test = train_test_split(weight, Y) print(X_train.shape, X_test.shape) print(len(y_train), len(y_test)) #(15, 117) (6, 117) 15 6 #--------------------------------建模与训练------------------------------- model = Sequential() #构建Embedding层 128代表Embedding层的向量维度 model.add(Embedding(max_features, 128)) #构建LSTM层 model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) #构建全连接层 #注意上面构建LSTM层时只会得到最后一个节点的输出,如果需要输出每个时间点的结果需将return_sequences=True model.add(Dense(units=1, activation='sigmoid')) #模型可视化 model.summary() #激活神经网络 model.compile( optimizer='rmsprop', #RMSprop优化器 loss='binary_crossentropy', #二元交叉熵损失
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Project : tql-Python. # @File : mid_layer # @Time : 2019-07-12 16:33 # @Author : yuanjie # @Email : [email protected] # @Software : PyCharm # @Description : """ https://www.tensorflow.org/beta/tutorials/keras/feature_columns """ from tensorflow.python.keras.layers import Input, Embedding, Reshape, Activation from tensorflow.python.keras.models import Model input_model = Input(shape=(1, )) output_store = Embedding(1115, 10, name='store_embedding')(input_model) output_store = Reshape(target_shape=(10, ))(output_store) output_model = Activation('sigmoid')(output_store) model = Model(inputs=input_model, outputs=output_model) model.summary() embed = Model(inputs=model.input, outputs=model.get_layer(index=1).output) # 以这个model的预测值作为输出 embed.predict([[1]])
train_seq_mat = sequence.pad_sequences(train_seq, maxlen=max_len) val_seq_mat = sequence.pad_sequences(val_seq, maxlen=max_len) test_seq_mat = sequence.pad_sequences(test_seq, maxlen=max_len) print(train_seq_mat.shape) print(val_seq_mat.shape) print(test_seq_mat.shape) # In[ ]: # In[17]: ## 定义LSTM模型 inputs = Input(name='inputs', shape=[max_len]) ## Embedding(词汇表大小,batch大小) layer = Embedding(max_words + 1, 128, input_length=max_len)(inputs) layer = LSTM(128)(layer) layer = Dense(128, activation="relu", name="FC1")(layer) layer = Dropout(0.5)(layer) layer = Dense(3, activation="softmax", name="FC2")(layer) model = Model(inputs=inputs, outputs=layer) model.summary() model.compile(loss="categorical_crossentropy", optimizer=RMSprop(), metrics=["accuracy"]) # In[15]: # model = tf.keras.Sequential([ # tf.keras.layers.Embedding(max_words+1,128,input_length=max_len), # tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
def seq2seq_architecture(latent_size, vocabulary_size, article_max_len, embedding_matrix, batch_size, epochs, train_article, train_summary, train_target): encoder_inputs = Input(shape=(article_max_len, ), name='Encoder-Input') encoder_embeddings = Embedding( vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=False, name='Encoder-Word-Embedding')(encoder_inputs) encoder_embeddings = BatchNormalization( name='Encoder-Batch-Normalization')(encoder_embeddings) encoder_conv = Conv1D(filters=4, kernel_size=8, padding='same', activation='relu')(encoder_embeddings) encoder_drop = Dropout(0.25)(encoder_conv) encoder_pool = MaxPooling1D(pool_size=1)(encoder_drop) encoder_flatten = Flatten()(encoder_pool) encoder_model = Model(inputs=encoder_inputs, outputs=encoder_flatten, name='Encoder-Model') encoder_outputs = encoder_model(encoder_inputs) decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding( vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=False, name='Decoder-Word-Embedding')(decoder_inputs) decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1')(decoder_embeddings) decoder_conv = Conv1D(filters=32, kernel_size=4, padding='same', activation='relu', name='Decoder-Conv1D') \ (decoder_embeddings) decoder_drop = Dropout(0.25, name='Decoder-Conv1D-Dropout')(decoder_conv) decoder_pool = MaxPooling1D(pool_size=1, name='Decoder-MaxPool1D')( decoder_drop) # GlobalMaxPool1D() decoder_gru = GRU(latent_size, return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_outputs, _ = decoder_gru(decoder_pool, initial_state=encoder_outputs) decoder_outputs = BatchNormalization( name='Decoder-Batch-Normalization-2')(decoder_gru_outputs) decoder_outputs = Dense(vocabulary_size + 1, activation='softmax', name='Final-Output-Dense')(decoder_outputs) seq2seq_model = Model([encoder_inputs, decoder_inputs], decoder_outputs) seq2seq_model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) seq2seq_model.summary() classes = [item for sublist in train_summary.tolist() for item in sublist] class_weights = class_weight.compute_class_weight('balanced', np.unique(classes), classes) e_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='min', restore_best_weights=True) history = seq2seq_model.fit(x=[train_article, train_summary], y=np.expand_dims(train_target, -1), batch_size=batch_size, epochs=epochs, validation_split=0.1, callbacks=[e_stopping], class_weight=class_weights) f = open("data/models/convgru_results.txt", "w", encoding="utf-8") f.write("ConvGRU \n layers: 1 \n latent size: " + str(latent_size) + "\n vocab size: " + str(vocabulary_size) + "\n") f.close() history_dict = history.history plot_loss(history_dict) # inference encoder_model = seq2seq_model.get_layer('Encoder-Model') decoder_inputs = seq2seq_model.get_layer('Decoder-Input').input decoder_embeddings = seq2seq_model.get_layer('Decoder-Word-Embedding')( decoder_inputs) decoder_embeddings = seq2seq_model.get_layer( 'Decoder-Batch-Normalization-1')(decoder_embeddings) decoder_conv = seq2seq_model.get_layer('Decoder-Conv1D')( decoder_embeddings) decoder_drop = seq2seq_model.get_layer('Decoder-Conv1D-Dropout')( decoder_conv) decoder_pool = seq2seq_model.get_layer('Decoder-MaxPool1D')(decoder_drop) gru_inference_state_input = Input(shape=(latent_size, ), name='Hidden-State-Input') gru_out, gru_state_out = seq2seq_model.get_layer('Decoder-GRU')( [decoder_pool, gru_inference_state_input]) decoder_outputs = seq2seq_model.get_layer('Decoder-Batch-Normalization-2')( gru_out) dense_out = seq2seq_model.get_layer('Final-Output-Dense')(decoder_outputs) decoder_model = Model([decoder_inputs, gru_inference_state_input], [dense_out, gru_state_out]) return encoder_model, decoder_model
for v in vals.keys(): vals[v] = vals[v] / len(y) print(vals) kfold = StratifiedKFold(n_splits=5, shuffle=True) cvscores_avg = [] cvscores_against = [] cvscores_favor = [] tri = [] for train, test in kfold.split(x_train_pad, y): x_train = x_train_pad[train] y_train = y_train_onehot[train] x_test = x_train_pad[test] y_test = y_train_onehot[test] model = Sequential() model.add(Embedding(input_dim=num_words, output_dim=embedding_size, input_length=max_tokens, name='layer_embedding')) # model.add(Flatten()) # model.add(Dense(25, activation='elu')) # model.add(Dropout(0.5)) model.add(GRU(units=16)) model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-3), metrics=['acc']) model.fit(x_train, y_train, epochs=20, batch_size=128, verbose=0) y_pred = model.predict(x_test).argmax(axis=-1) cm = classification_report(y_test.argmax(axis=-1), y_pred) dl = f1_score(y_true=y_test.argmax(axis=-1), y_pred=y_pred, average=None)
activation='tanh', dropout=0.2, recurrent_dropout=0.2) encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1) #encoder lstm 3 encoder_lstm3 = LSTM(latent_dim, return_state=True, return_sequences=True, activation='tanh', dropout=0.2, recurrent_dropout=0.2) encoder_outputs, state_h, state_c = encoder_lstm3(encoder_output2) dec_input = Input(shape=(None, )) dec_emb_layer = Embedding(output_size + 1, latent_dim, trainable=True) dec_emb = dec_emb_layer(dec_input) decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True, activation='tanh', dropout=0.2, recurrent_dropout=0.2) decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm( dec_emb, initial_state=[state_h, state_c]) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs])
def sepCNN(blocks, filters, kernel_size, embedding_dim, dropout_rate, pool_size, input_shape, num_features, pretrained_embedding=False, embedding_trainable=False, embedding_weights=None, learning_rate=1e-3): """ Creates an instance of a separable CNN model. Parameters ---------- blocks: int Number of pairs of sepCNN and pooling blocks in the model. One block contains [DropOut, Conv1D, Conv1D, MaxPool] filters: int Output dimension of the layers. kernel_size: int Length of the convolution window. embedding_dim: int Dimension of the embedding vectors. dropout_rate: float Percentage of input to drop at Dropout layers. pool_size: int Factor by which to downscale input at MaxPooling layer. input_shape: tuple Shape of input to the model. num_features: int Number of words (embedding input dimension). pretrained_embedding: bool True if pre-trained embedding is on. embedding_trainable: bool True if embedding layer is trainable. embedding_weights: np.ndarray Dictionary with embedding coefficients. learning_rate: float Learning rate parameter for the model Returns ------- model: A compiled sepCNN keras model instance. """ model = Sequential() if pretrained_embedding: model.add( Embedding(num_features, embedding_dim, input_length=input_shape[0], embeddings_initializer=Constant(embedding_weights), trainable=embedding_trainable)) else: model.add( Embedding(num_features, embedding_dim, input_length=input_shape[0])) for _ in range(blocks - 1): model.add(Dropout(dropout_rate)) model.add( SeparableConv1D(filters, kernel_size, activation='relu', padding='same')) model.add( SeparableConv1D(filters, kernel_size, activation='relu', padding='same')) model.add(MaxPooling1D(pool_size)) model.add( SeparableConv1D(filters * 2, kernel_size, activation='relu', padding='same')) model.add( SeparableConv1D(filters * 2, kernel_size, activation='relu', padding='same')) model.add(GlobalAveragePooling1D()) model.add(Dropout(dropout_rate)) model.add(Dense(1, activation='sigmoid')) optimizer = Adam(lr=learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) return model
def train(self, texts: List[str], target: List[int]) -> None: from tensorflow.python.keras.models import Model #type: ignore from tensorflow.python.keras.layers import Input, Embedding, GRU, Dense, Bidirectional, GlobalMaxPool1D, concatenate #type: ignore from tensorflow.keras.optimizers import Adam #type: ignore from tensorflow.keras.callbacks import History #type: ignore if self.downsampling: texts, target = downsample(texts, target, self.downsampling_ratio) if self.verbose: print('1. Vectorizing texts') NUMBER_OF_FEATURES: int = 20000 self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES) self.tokenizer.fit_on_texts(texts) vocabulary: Dict[str, int] = self.tokenizer.word_index if self._max_sequence_length == 0: self._max_sequence_length = len(max(texts, key=len)) vectorized_texts: array = self.vectorize_texts(texts) if self.include_casing_information: casing_information: array = self.texts_to_casing_information(texts) if self.embedding_location == '': if self.verbose: print('2. Skip (no embeddings)') print('3. Skip (no embeddings)') else: if self.verbose: print('2. Loading word embeddings') embedding_dictionary: Dict[ str, List[float]] = load_embedding_dictionary( self.embedding_location) nr_of_embedding_features: int = len( list(embedding_dictionary.values()) [1]) # Check how many values we have for the first word if self.verbose: print('3. Creating embedding matrix') embedding_matrix: array = create_embedding_matrix_for_vocabulary( embedding_dictionary, vocabulary) if self.verbose: print('4. Building up model') #Define a simple BiGru model with a pretrained embedding layer word_input: Input = Input(shape=(self._max_sequence_length, )) if self.embedding_location == '': #Add an empty embedding layer if we have no pretrained embeddings EMPTY_EMBEDDING_LAYER_SIZE: int = 300 layers = Embedding( len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE)(word_input) else: layers = Embedding(input_dim=len(vocabulary) + 1, output_dim=nr_of_embedding_features, input_length=vectorized_texts.shape[1], weights=[embedding_matrix], trainable=False)(word_input) #Add a separate 'entrance' for the casing information if self.include_casing_information: word_model: Model = Model(inputs=word_input, outputs=layers) casing_input: Input = Input(shape=(self._max_sequence_length, 1)) casing_model: Model = Model(inputs=casing_input, outputs=casing_input) layers = concatenate([word_model.output, casing_model.output]) if self.bidirectional: layers = Bidirectional( GRU(16, activation='tanh', return_sequences=True))(layers) layers = Bidirectional( GRU(16, activation='tanh', return_sequences=True))(layers) else: layers = GRU(16, activation='tanh', return_sequences=True)(layers) layers = GRU(16, activation='tanh', return_sequences=True)(layers) layers = GlobalMaxPool1D()(layers) layers = Dense(256)(layers) layers = Dense(256)(layers) layers = Dense(1, activation='sigmoid')(layers) if self.include_casing_information: model: Model = Model([word_model.input, casing_model.input], layers) else: model: Model = Model(word_input, layers) #Compile the model optimizer: Adam = Adam(lr=self.learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) if self.verbose: print('5. training the model') if self.include_casing_information: input = [vectorized_texts, casing_information] else: input = vectorized_texts history: History = model.fit( input, target, epochs=self.learning_epochs, #validation_data=(test_vectors, test_target), verbose=1, # Logs once per epoch. batch_size=self.learning_batch_size) self.model = model
from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers import Dense from tensorflow.python.keras.layers import LSTM from tensorflow.python.keras.layers import Conv1D from tensorflow.python.keras.layers import MaxPooling1D from tensorflow.python.keras.layers import Embedding from tensorflow.python.keras.preprocessing import sequence numpy.random.seed(7) top_words = 5000 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) max_review_length = 500 X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) embedding_vecor_length = 32 model = Sequential() model.add(Embedding(top_words, embedding_vecor_length, input_length=max_review_length)) model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) model.add(MaxPooling1D(pool_size=2)) model.add(LSTM(100)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) model.fit(X_train, y_train, epochs=10, batch_size=64) # Final evaluation of the model scores = model.evaluate(X_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1]*100))
def train(self, texts: List[str], target: List[int]) -> None: from tensorflow.python.keras.models import Sequential #type: ignore from tensorflow.python.keras.layers import Embedding, Dense, LSTM, GlobalMaxPool1D #type: ignore from tensorflow.keras.optimizers import Adam #type: ignore from tensorflow.keras.callbacks import History #type: ignore if self.downsampling: texts, target = downsample(texts, target, self.downsampling_ratio) if self.verbose: print('1. Vectorizing texts') NUMBER_OF_FEATURES: int = 20000 self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES) self.tokenizer.fit_on_texts(texts) vocabulary: Dict[str, int] = self.tokenizer.word_index if self._max_sequence_length == 0: self._max_sequence_length = len(max(texts, key=len)) vectorized_texts: array = self.vectorize_texts(texts) if self.embedding_location == '': if self.verbose: print('2. Skip (no embeddings)') print('3. Skip (no embeddings)') else: if self.verbose: print('2. Loading word embeddings') embedding_dictionary: Dict[ str, List[float]] = load_embedding_dictionary( self.embedding_location) nr_of_embedding_features: int = len( list(embedding_dictionary.values()) [1]) # Check how many values we have for the first word if self.verbose: print('3. Creating embedding matrix') embedding_matrix: array = create_embedding_matrix_for_vocabulary( embedding_dictionary, vocabulary) if self.verbose: print('4. Building up model') #Define a simple LSTM model with a pretrained embedding layer model: Sequential = Sequential() if self.embedding_location == '': #Add an empty embedding layer if we have no pretrained embeddings EMPTY_EMBEDDING_LAYER_SIZE: int = 300 model.add( Embedding(len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE)) else: model.add( Embedding(input_dim=len(vocabulary) + 1, output_dim=nr_of_embedding_features, input_length=vectorized_texts.shape[1], weights=[embedding_matrix], trainable=False)) model.add(LSTM(16, return_sequences=True)) model.add(LSTM(16, return_sequences=True)) model.add(LSTM(16, return_sequences=True)) model.add(GlobalMaxPool1D()) model.add(Dense(256)) model.add(Dense(256)) model.add(Dense(1, activation='sigmoid')) #Compile the model optimizer: Adam = Adam(lr=self.learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) if self.verbose: print('5. training the model') history: History = model.fit( vectorized_texts, target, epochs=self.learning_epochs, #validation_data=(test_vectors, test_target), verbose=1, # Logs once per epoch. batch_size=self.learning_batch_size) self.model = model
def seq2seq_architecture(latent_size, vocabulary_size, max_len_article, embedding_matrix, batch_size, epochs, train_article, train_summary, train_target): # encoder encoder_inputs = Input(shape=(None, ), name='Encoder-Input') encoder_embeddings = Embedding(vocabulary_size, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Encoder-Word-Embedding') norm_encoder_embeddings = BatchNormalization( name='Encoder-Batch-Normalization') encoder_lstm_1 = LSTM(latent_size, name='Encoder-LSTM-1', return_sequences=True, dropout=0.2, recurrent_dropout=0.2) e = encoder_embeddings(encoder_inputs) e = norm_encoder_embeddings(e) encoder_outputs = encoder_lstm_1(e) encoder_last = encoder_outputs[:, -1, :] # decoder decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding(vocabulary_size, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Decoder-Word-Embedding') norm_decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1') decoder_lstm_1 = LSTM(latent_size, name='Decoder-LSTM-1', return_sequences=True, dropout=0.2, recurrent_dropout=0.2) norm_decoder = BatchNormalization(name='Decoder-Batch-Normalization-2') attention_activation = Activation('softmax', name='Attention') dense_intermediate = TimeDistributed( Dense(64, activation="tanh", name="Intermediate-Output-Dense")) dense_final = TimeDistributed( Dense(vocabulary_size, activation="softmax", name="Final-Output-Dense")) d = decoder_embeddings(decoder_inputs) d = norm_decoder_embeddings(d) decoder_outputs = decoder_lstm_1( d, initial_state=[encoder_last, encoder_last]) decoder_outputs = norm_decoder(decoder_outputs) attention = dot([decoder_outputs, encoder_outputs], axes=[2, 2]) attention = attention_activation(attention) context = dot([attention, encoder_outputs], axes=[2, 1]) decoder_combined_context = concatenate([context, decoder_outputs]) outputs = dense_intermediate(decoder_combined_context) decoder_last = dense_final(outputs) seq2seq_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_last) seq2seq_model.compile(optimizer="rmsprop", loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) seq2seq_model.summary() classes = [item for sublist in train_summary.tolist() for item in sublist] class_weights = class_weight.compute_class_weight('balanced', np.unique(classes), classes) e_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='min', restore_best_weights=True) history = seq2seq_model.fit(x=[train_article, train_summary], y=np.expand_dims(train_target, -1), batch_size=batch_size, epochs=epochs, validation_split=0.1, class_weight=class_weights) f = open("data/models/results.txt", "w", encoding="utf-8") f.write("Attention LSTM \n layers: 1 \n latent size: " + str(latent_size) + "\n vocab size: " + str(vocabulary_size) + "\n") f.close() history_dict = history.history plot_loss(history_dict) return seq2seq_model
w2v_model = gensim.models.KeyedVectors.load_word2vec_format(VECTOR_DIR, binary=True) embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM)) not_in_model = 0 in_model = 0 for word, i in word_index.items(): if unicode(word) in w2v_model: in_model += 1 embedding_matrix[i] = np.asarray(w2v_model[unicode(word)], dtype='float32') else: not_in_model += 1 print(str(not_in_model) + ' words not in w2v model') embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) print('(5) training model...') model = Sequential() model.add(embedding_layer) model.add(Dropout(0.2)) model.add(Conv1D(250, 3, padding='valid', activation='relu', strides=1)) model.add(MaxPooling1D(3)) model.add(Flatten()) model.add(Dense(EMBEDDING_DIM, activation='relu')) model.add(Dense(labels.shape[1], activation='softmax')) model.summary() plot_model(model, to_file=os.path.join(ckpt_path, 'word_vector_cnn_model.png'),
batch = next(generator) #通过创建一批数据来测试数据生成器 num_captions_train = [len(captions) for captions in captions_train] #这是训练集中每个图像的描述数量。 total_num_captions_train = np.sum(num_captions_train) #这是训练集中的描述总数。 steps_per_epoch = int(total_num_captions_train / batch_size) #总的批次量 state_size = 512 #解码器由3个GRU组成,其内部大小为512 embedding_size = 128 #嵌入层的大小为128 transfer_values_input = Input(shape=(transfer_values_size,), name='transfer_values_input') #将传输值输入到解码器中 decoder_transfer_map = Dense(state_size, activation='tanh', name='decoder_transfer_map') #使用完全连接的层来映射4096到512个元素的向量。并使用tanh激活函数来限制-1,1之间的输出 decoder_input = Input(shape=(None, ), name='decoder_input') #这是标记序列到解码器的输入。 decoder_embedding = Embedding(input_dim=num_words, output_dim=embedding_size, name='decoder_embedding') #将整数标记的序列转换为向量序列。 #创建解码器的3个GRU层 decoder_gru1 = GRU(state_size, name='decoder_gru1', return_sequences=True) decoder_gru2 = GRU(state_size, name='decoder_gru2', return_sequences=True) decoder_gru3 = GRU(state_size, name='decoder_gru3', return_sequences=True) decoder_dense = Dense(num_words, activation='linear', name='decoder_output') #GRU层输出形状为[batch_size,sequence_length,state_size]的张量,其中每个“字”被编码为长度为state_size(512)的向量。 需要将其转换为整数标记序列,可以将其解释为词汇表中的单词。 #将解码器的所有层连接到传输值的输入。 def connect_decoder(transfer_values): # 映射传输值,使维度与GRU层的内部状态匹配。 可以使用映射的传输值作为GRU层的初始状态。
decoder_input_data = tokens_snippets[:, :-1] print("decoder input shape: ", decoder_input_data.shape) decoder_output_data = tokens_snippets[:, 1:] print("decoder output shape: ", decoder_output_data.shape) print(decoder_input_data[5]) print(decoder_output_data[5]) print(tokenizer_snippets.tokens_to_string(decoder_input_data[5])) print(tokenizer_snippets.tokens_to_string(decoder_output_data[5])) encoder_input = Input(shape=(None, ), name='encoder_input') embedding_size = 128 encoder_embedding = Embedding(input_dim=num_words, output_dim=embedding_size, name='encoder_embedding') state_size = 512 encoder_gru1 = GRU(state_size, name='encoder_gru1', return_sequences=True) encoder_gru2 = GRU(state_size, name='encoder_gru2', return_sequences=True) encoder_gru3 = GRU(state_size, name='encoder_gru3', return_sequences=False) def connect_encoder(): # Start the neural network with its input-layer. net = encoder_input # Connect the embedding-layer. net = encoder_embedding(net)
def DSIN( dnn_feature_columns, sess_feature_list, sess_max_count=5, bias_encoding=False, att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0, dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, seed=1024, task='binary', ): """Instantiates the Deep Session Interest Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param sess_feature_list: list,to indicate sequence sparse field :param sess_max_count: positive int, to indicate the max number of sessions :param sess_len_max: positive int, to indicate the max length of each session :param bias_encoding: bool. Whether use bias encoding or postional encoding :param att_embedding_size: positive int, the embedding size of each attention head :param att_head_num: positive int, the number of attention head :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ hist_emb_size = sum( map( lambda fc: fc.embedding_dim, filter(lambda fc: fc.name in sess_feature_list, dnn_feature_columns))) if (att_embedding_size * att_head_num != hist_emb_size): raise ValueError( "hist_emb_size must equal to att_embedding_size * att_head_num ,got %d != %d *%d" % (hist_emb_size, att_embedding_size, att_head_num)) features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "sess" + x, sess_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: continue else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) user_behavior_input_dict = {} for idx in range(sess_max_count): sess_input = OrderedDict() for i, feat in enumerate(sess_feature_list): sess_input[feat] = features["sess_" + str(idx) + "_" + feat] user_behavior_input_dict["sess_" + str(idx)] = sess_input user_sess_length = Input(shape=(1, ), name='sess_length') embedding_dict = { feat.embedding_name: Embedding(feat.vocabulary_size, feat.embedding_dim, embeddings_initializer=feat.embeddings_initializer, embeddings_regularizer=l2(l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name, mask_zero=(feat.name in sess_feature_list)) for i, feat in enumerate(sparse_feature_columns) } query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, sess_feature_list, sess_feature_list, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=sess_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) query_emb = concat_func(query_emb_list, mask=True) dnn_input_emb = Flatten()(concat_func(dnn_input_emb_list)) tr_input = sess_interest_division(embedding_dict, user_behavior_input_dict, sparse_feature_columns, sess_feature_list, sess_max_count, bias_encoding=bias_encoding) Self_Attention = Transformer(att_embedding_size, att_head_num, dropout_rate=0, use_layer_norm=False, use_positional_encoding=(not bias_encoding), seed=seed, supports_masking=True, blinding=True) sess_fea = sess_interest_extractor(tr_input, sess_max_count, Self_Attention) interest_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True, supports_masking=False)([query_emb, sess_fea, user_sess_length]) lstm_outputs = BiLSTM( hist_emb_size, layers=2, res_layers=0, dropout_rate=0.2, )(sess_fea) lstm_attention_layer = AttentionSequencePoolingLayer( att_hidden_units=(64, 16), weight_normalization=True)([query_emb, lstm_outputs, user_sess_length]) dnn_input_emb = Concatenate()([ dnn_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer) ]) dnn_input_emb = combined_dnn_input([dnn_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input_emb) output = Dense(1, use_bias=False, activation=None)(output) output = PredictionLayer(task)(output) sess_input_list = [] # sess_input_length_list = [] for i in range(sess_max_count): sess_name = "sess_" + str(i) sess_input_list.extend( get_inputs_list([user_behavior_input_dict[sess_name]])) # sess_input_length_list.append(user_behavior_length_dict[sess_name]) model = Model(inputs=inputs_list + [user_sess_length], outputs=output) return model
def keras_estimator(model_dir, config, learning_rate, filters=64, dropout_rate=0.2, embedding_dim=200, kernel_size=3, pool_size=3, embedding_path=None, word_index=None): # Create model instance. model = models.Sequential() num_features = min(len(word_index) + 1, TOP_K) # Add embedding layer. If pre-trained embedding is used add weights to the # embeddings layer and set trainable to input is_embedding_trainable flag. # Embedding coverts each integer to a richer representation using a vector of floats if embedding_path != None: embedding_matrix = get_embedding_matrix(word_index, embedding_path, embedding_dim) is_embedding_trainable = True # set to False to freeze embedding weights model.add( Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], trainable=is_embedding_trainable)) else: model.add( Embedding(input_dim=num_features, output_dim=embedding_dim, input_length=MAX_SEQUENCE_LENGTH)) model.add(Dropout(rate=dropout_rate)) model.add( Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', bias_initializer='random_uniform', padding='same')) model.add(MaxPooling1D(pool_size=pool_size)) model.add( Conv1D(filters=filters * 2, kernel_size=kernel_size, activation='relu', bias_initializer='random_uniform', padding='same')) model.add( GlobalAveragePooling1D() ) # pool size = size of the input, way of flattening the output of our CNN model.add(Dropout(rate=dropout_rate)) model.add(Dense(len(CLASSES), activation='softmax')) # Compile model with learning parameters. optimizer = tf.keras.optimizers.Adam(lr=learning_rate) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['acc']) estimator = tf.keras.estimator.model_to_estimator(keras_model=model, model_dir=model_dir, config=config) return estimator
def create_embedding_dict(sparse_feature_columns, varlen_sparse_feature_columns, embedding_size, init_std, seed, l2_reg, prefix='sparse_', seq_mask_zero=True): if embedding_size == 'auto': print("Notice:Do not use auto embedding in models other than DCN") sparse_embedding = { feat.embedding_name: Embedding(feat.dimension, 6 * int(pow(feat.dimension, 0.25)), embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + feat.name) for feat in sparse_feature_columns } else: sparse_embedding = { feat.embedding_name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_emb_' + feat.name) for feat in sparse_feature_columns } if varlen_sparse_feature_columns and len( varlen_sparse_feature_columns) > 0: for feat in varlen_sparse_feature_columns: # if feat.name not in sparse_embedding: if embedding_size == "auto": sparse_embedding[feat.embedding_name] = Embedding( feat.dimension, 6 * int(pow(feat.dimension, 0.25)), embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_seq_emb_' + feat.name, mask_zero=seq_mask_zero) else: sparse_embedding[feat.embedding_name] = Embedding( feat.dimension, embedding_size, embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg), name=prefix + '_seq_emb_' + feat.name, mask_zero=seq_mask_zero) return sparse_embedding
return x, y train_x, train_y = preprocss(train_data) test_x, test_y = preprocss(eval_data) EPOCHS = 10 EMBED_DIM = 64 BiRNN_UNITS = 200 vacab_size = vocabulary_lookuper.size() tag_size = tag_lookuper.size() model = Sequential() model.add(Embedding(vacab_size, EMBED_DIM, mask_zero=True)) model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) model.add(CRF(tag_size)) # print model summary model.summary() callbacks_list = [] # tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=config['summary_log_dir']) # callbacks_list.append(tensorboard_callback) # # checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( # os.path.join(config['model_dir'], 'cp-{epoch:04d}.ckpt'), # load_weights_on_restart=True, # verbose=1
def create_model(num_encorder_paragraph_tokens, max_encoder_paragraph_seq_length, num_encoder_question_tokens, max_encoder_question_seq_length, num_decoder_tokens): hidden_units = 128 # 256, 128, 64 embed_hidden_units = 100 context_inputs = Input(shape=(None, ), name='context_inputs') encoded_context = Embedding(input_dim=num_encorder_paragraph_tokens, output_dim=embed_hidden_units, input_length=max_encoder_paragraph_seq_length, name='context_embedding')(context_inputs) encoded_context = Dropout(0.3)(encoded_context) question_inputs = Input(shape=(None, ), name='question_inputs') encoded_question = Embedding(input_dim=num_encoder_question_tokens, output_dim=embed_hidden_units, input_length=max_encoder_question_seq_length, name='question_embedding')(question_inputs) encoded_question = Dropout(0.3)(encoded_question) encoded_question = LSTM(units=embed_hidden_units, name='question_lstm')(encoded_question) encoded_question = RepeatVector(max_encoder_paragraph_seq_length)( encoded_question) merged = add([encoded_context, encoded_question]) encoder_lstm = LSTM(units=hidden_units, return_state=True, name='encoder_lstm') encoder_outputs, encoder_state_h, encoder_state_c = encoder_lstm(merged) encoder_states = [encoder_state_h, encoder_state_c] decoder_inputs = Input(shape=(None, num_decoder_tokens), name='decoder_inputs') decoder_lstm = LSTM(units=hidden_units, return_state=True, return_sequences=True, name='decoder_lstm') decoder_outputs, decoder_state_h, decoder_state_c = decoder_lstm( decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(units=num_decoder_tokens, activation='softmax', name='decoder_dense') decoder_outputs = decoder_dense(decoder_outputs) model = Model([context_inputs, question_inputs, decoder_inputs], decoder_outputs) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) encoder_model = Model([context_inputs, question_inputs], encoder_states) decoder_state_inputs = [ Input(shape=(hidden_units, )), Input(shape=(hidden_units, )) ] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_state_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model([decoder_inputs] + decoder_state_inputs, [decoder_outputs] + decoder_states) return model, encoder_model, decoder_model
print('(3) split data set...') p1 = int(len(data) * (1 - VALIDATION_SPLIT - TEST_SPLIT)) p2 = int(len(data) * (1 - TEST_SPLIT)) x_train = data[:p1] y_train = labels[:p1] x_val = data[p1:p2] y_val = labels[p1:p2] x_test = data[p2:] y_test = labels[p2:] print('train docs: ' + str(len(x_train)), 'val docs: ' + str(len(x_val)), 'test docs: ' + str(len(x_test))) print('(4) training model...') model = Sequential() model.add(Embedding(len(word_index) + 1, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH)) model.add(LSTM(200, dropout=0.2, recurrent_dropout=0.2)) model.add(Dropout(0.2)) model.add(Dense(labels.shape[1], activation='softmax')) model.summary() plot_model(model, to_file=os.path.join(ckpt_path, 'lstm_model.png'), show_shapes=True) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) print(model.metrics_names) model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=2, batch_size=128) model.save(os.path.join(ckpt_path, 'lstm.h5')) print('(5) testing model...') print(model.evaluate(x_test, y_test))
def seq2seq_architecture(latent_size, vocabulary_size, embedding_matrix, batch_size, epochs, train_article, train_summary, train_target): # encoder encoder_inputs = Input(shape=(None, ), name='Encoder-Input') encoder_embeddings = Embedding( vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Encoder-Word-Embedding')(encoder_inputs) encoder_embeddings = BatchNormalization( name='Encoder-Batch-Normalization')(encoder_embeddings) _, state_h, state_c = LSTM(latent_size, return_state=True, dropout=0.2, recurrent_dropout=0.2, name='Encoder-LSTM')(encoder_embeddings) encoder_states = [state_h, state_c] encoder_model = Model(inputs=encoder_inputs, outputs=encoder_states, name='Encoder-Model') encoder_outputs = encoder_model(encoder_inputs) # decoder decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding( vocabulary_size + 1, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Decoder-Word-Embedding')(decoder_inputs) decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1')(decoder_embeddings) decoder_lstm = LSTM(latent_size, return_state=True, return_sequences=True, dropout=0.2, recurrent_dropout=0.2, name='Decoder-LSTM') decoder_lstm_outputs, _, _ = decoder_lstm(decoder_embeddings, initial_state=encoder_outputs) decoder_batchnorm = BatchNormalization( name='Decoder-Batch-Normalization-2')(decoder_lstm_outputs) decoder_outputs = Dense(vocabulary_size + 1, activation='softmax', name='Final-Output-Dense')(decoder_batchnorm) seq2seq_model = Model([encoder_inputs, decoder_inputs], decoder_outputs) seq2seq_model.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) seq2seq_model.summary() classes = [item for sublist in train_summary.tolist() for item in sublist] class_weights = class_weight.compute_class_weight('balanced', np.unique(classes), classes) e_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='min', restore_best_weights=True) history = seq2seq_model.fit(x=[train_article, train_summary], y=np.expand_dims(train_target, -1), batch_size=batch_size, epochs=epochs, validation_split=0.1, callbacks=[e_stopping], class_weight=class_weights) f = open("data/models/lstm_results.txt", "w", encoding="utf-8") f.write("LSTM \n layers: 1 \n latent size: " + str(latent_size) + "\n vocab size: " + str(vocabulary_size) + "\n") f.close() history_dict = history.history plot_loss(history_dict) # inference encoder_model = seq2seq_model.get_layer('Encoder-Model') decoder_inputs = seq2seq_model.get_layer('Decoder-Input').input decoder_embeddings = seq2seq_model.get_layer('Decoder-Word-Embedding')( decoder_inputs) decoder_embeddings = seq2seq_model.get_layer( 'Decoder-Batch-Normalization-1')(decoder_embeddings) inference_state_h_input = Input(shape=(latent_size, ), name='Hidden-State-Input') inference_state_c_input = Input(shape=(latent_size, ), name='Cell-State-Input') lstm_out, lstm_state_h_out, lstm_state_c_out = seq2seq_model.get_layer( 'Decoder-LSTM')([ decoder_embeddings, inference_state_h_input, inference_state_c_input ]) decoder_outputs = seq2seq_model.get_layer('Decoder-Batch-Normalization-2')( lstm_out) dense_out = seq2seq_model.get_layer('Final-Output-Dense')(decoder_outputs) decoder_model = Model( [decoder_inputs, inference_state_h_input, inference_state_c_input], [dense_out, lstm_state_h_out, lstm_state_c_out]) return encoder_model, decoder_model
steps = int(len(all_caps_train) / batch_size) activation_vector_length = vgg_activations.shape[1] del captions del captions_marked del coco_inst del coco_caps image_activation_input = Input(shape=(activation_vector_length, ), name='img_act_input') model_map_layer = Dense(cell_state_size, activation='tanh', name='fc_map')(image_activation_input) lang_model_input = Input(shape=(None, ), name="lang_input") lang_embed = Embedding(input_dim=num_words, output_dim=embedding_size, name='lang_embed')(lang_model_input) lang_gru1 = GRU(cell_state_size, name='lang_gru1', return_sequences=True)(lang_embed, initial_state=model_map_layer) lang_gru2 = GRU(cell_state_size, name='lang_gru2', return_sequences=True)(lang_gru1, initial_state=model_map_layer) lang_gru3 = GRU(cell_state_size, name='lang_gru3', return_sequences=True)(lang_gru2, initial_state=model_map_layer) lang_out = Dense(num_words, activation='linear', name='lang_out')(lang_gru3) language_model = Model(inputs=[image_activation_input, lang_model_input], outputs=[lang_out])
def MTL_with_Title( feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=( 256, 256, ), cin_split_half=True, task_net_size=(128, ), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, seed=1024, ): check_feature_config_dict(feature_dim_dict) if len(task_net_size) < 1: raise ValueError('task_net_size must be at least one layer') # xDeepFM Model deep_emb_list, linear_logit, inputs_list = preprocess_input_embedding( feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, 0.0001, seed) fm_input = concat_fun(deep_emb_list, axis=1) if len(cin_layer_size) > 0: exFM_out = CIN(cin_layer_size, 'relu', cin_split_half, seed)(fm_input) exFM_logit = tf.keras.layers.Dense( 1, activation=None, )(exFM_out) deep_input = tf.keras.layers.Flatten()(fm_input) deep_out = MLP(hidden_size)(deep_input) finish_out = MLP(task_net_size)(deep_out) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out = MLP(task_net_size)(deep_out) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) # Add Title Features title_input = Input(shape=(35, ), dtype='int32', name='title_input') title_embedding = Embedding(output_dim=32, input_dim=134545, input_length=35)(title_input) lstm_out = LSTM(units=32, return_sequences=True)(title_embedding) avg_out = GlobalAveragePooling1D()(lstm_out) dense1 = Dense(32, activation='relu')(avg_out) dense2 = Dense(1, activation='relu')(dense1) # finish_logit = tf.keras.layers.add( [linear_logit, finish_logit, exFM_logit, dense2]) like_logit = tf.keras.layers.add( [linear_logit, like_logit, exFM_logit, dense2]) output_finish = PredictionLayer('sigmoid', name='finish')(finish_logit) output_like = PredictionLayer('sigmoid', name='like')(like_logit) print(str(inputs_list)) inputs_list.append(title_input) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model