def get_model(data: SupervisedData, embed_hidden_size): sentence = Input(shape=data.input_shape[SENTENCE], dtype='int32') # noinspection PyUnresolvedReferences encoded_sentence = Embedding(data.vocab_size, embed_hidden_size)(sentence) encoded_sentence = Dropout(0.3)(encoded_sentence) question = Input(shape=data.input_shape[QUERY], dtype='int32') # noinspection PyUnresolvedReferences encoded_question = Embedding(data.vocab_size, embed_hidden_size)(question) encoded_question = Dropout(0.3)(encoded_question) encoded_question = RNN(embed_hidden_size)(encoded_question) # noinspection PyUnresolvedReferences encoded_question = RepeatVector(data.sentence_maxlen)(encoded_question) merged = add([encoded_sentence, encoded_question]) merged = RNN(embed_hidden_size)(merged) merged = Dropout(0.3)(merged) # noinspection PyUnresolvedReferences preds = Dense(data.vocab_size, activation='softmax')(merged) model = Model([sentence, question], preds) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() return model
def build_basic_model(): # Encoder encoder_inputs = Input(shape=(max_encoder_seq_length, ), name='encoder_inputs') encoder_embedding = embedding_layer(encoder_inputs) rnn_encoded, *encoder_states = LSTM(latent_dim, return_sequences=True, return_state=True, name='encoder_lstm')(encoder_embedding) decoder_inputs = Input(shape=(None, ), name='decoder_inputs') decoder_embedding = target_emedding_layer(decoder_inputs) attentionCell = AttentionRNNCell(units=latent_dim, encoder_ts=max_encoder_seq_length, encoder_latdim=latent_dim) Decoder_layer = RNN(attentionCell, return_sequences=True, return_state=True) decoder_outputs, decoder_states = Decoder_layer(decoder_embedding, encoder_states[0], constants=rnn_encoded) decoder_dense = Dense(num_decoder_tokens, activation='softmax', name='decoder_dense') decoder_outputs = decoder_dense(decoder_outputs) basic_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=[decoder_outputs]) basic_model.compile(optimizer='rmsprop', loss='categorical_crossentropy') return basic_model
def lmu_layer(return_sequences=False,**kwargs): return RNN(LMUCell(units=6, order=6, theta=6, input_encoders_initializer=Constant(1), hidden_encoders_initializer=Constant(0), memory_encoders_initializer=Constant(0), input_kernel_initializer=Constant(0), hidden_kernel_initializer=Constant(0), memory_kernel_initializer='glorot_normal', ), return_sequences=return_sequences, **kwargs)
def lstm_model(self): inp = Input(shape=(self.maxlen, )) if self.embeddings is not None: self.vocab_size = self.embeddings.embedding_matrix.shape[0] x = Embedding(self.vocab_size, c.MODEL.embed_size, weights=[self.embeddings.embedding_matrix], trainable=False)(inp) else: x = Embedding(self.vocab_size, c.MODEL.embed_size)(inp) for i in range(c.MODEL.n_layers): if (c.MODEL.seq_type == 'lstm'): x = LSTM(c.MODEL.embed_size, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(x) elif (c.MODEL.seq_type == 'gru'): x = GRU(c.MODEL.embed_size, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(x) else: x = RNN(c.MODEL.embed_size, return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(x) if c.MODEL.attention: x = AttentionWeightedAverage()(x) else: x = GlobalMaxPool1D()(x) x = Dense(c.MODEL.embed_size, activation="relu")(x) x = Dropout(0.1)(x) x = Dense(c.MODEL.nclasses, activation="sigmoid")(x) #if self.ngpus>1: # with tf.device("/cpu:0"): # model = Model(inputs=inp, outputs=x) #else: # model = Model(inputs=inp, outputs=x) model = Model(inputs=inp, outputs=x) adam = optimizers.Adam(lr=c.TRAINING.l_rate) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # if args.dropout_w > 0: dropout_W = args.dropout_w else: dropout_W = args.dropout_prob # default=0.5 if args.dropout_u > 0: dropout_U = args.dropout_u else: dropout_U = args.dropout_prob # default=0.1 cnn_border_mode = 'same' if args.model_type == 'reg': if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) else: num_outputs = initial_mean_value ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: def my_init(shape, name=None): from nea.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_matrix = np.random.random(shape) # logger.info(' initial matrix \n %s ' % (emb_matrix,)) emb_matrix = emb_reader.get_emb_matrix_given_vocab( vocab, emb_matrix) # from keras.backend import set_value, get_value # set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W))) # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value())) # logger.info(' pre-trained matrix \n %s ' % (emb_matrix,)) return K.variable(emb_matrix, name=name) logger.info(' Use pre-trained embedding') else: my_init = 'uniform' logger.info(' Use default initializing embedding') ############################################################################################################################### ## Model Stacking # if args.model_type == 'cls': logger.info('Building a CLASSIFICATION model with POOLING') dense_activation = 'tanh' dense_init = 'glorot_normal' final_init = 'glorot_uniform' if args.loss == 'cnp': final_activation = 'softmax' elif args.loss == 'hng': final_activation = 'linear' elif args.model_type == 'reg': logger.info('Building a REGRESSION model with POOLING') if args.normalize: final_activation = 'sigmoid' final_init = 'he_normal' dense_activation = 'tanh' dense_init = 'he_normal' else: final_activation = 'relu' final_init = 'he_uniform' dense_activation = 'tanh' dense_init = 'he_uniform' else: raise NotImplementedError sequence = Input(shape=(overal_maxlen, ), dtype='int32') x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # Conv Layer if args.cnn_dim > 0: x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # RNN Layer if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(x) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(x) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) # Stack 2 Layers if args.rnn_2l or args.rnn_3l: if args.bi: merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(merged) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(merged) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) # Stack 3 Layers if args.rnn_3l: if args.bi: merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(merged) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(merged) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) if args.aggregation == 'mot': forwards = MeanOverTime(mask_zero=True)(forwards) if args.bi: backwards = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') tfidfmerged = merge([merged, pca_input], mode='concat') else: tfidfmerged = merged # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': tfidfmerged = Dense( num_outputs, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(tfidfmerged) else: tfidfmerged = Dense(num_outputs, init=dense_init)(tfidfmerged) if final_activation == 'relu' or final_activation == 'linear': tfidfmerged = BatchNormalization()(tfidfmerged) tfidfmerged = Activation(dense_activation)(tfidfmerged) if args.dropout_prob > 0: tfidfmerged = Dropout(args.dropout_prob)(tfidfmerged) # Final Prediction Layer if args.loss == 'hng': tfidfmerged = Dense( num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(tfidfmerged) else: tfidfmerged = Dense(num_outputs, init=final_init)(tfidfmerged) if final_activation == 'relu' or final_activation == 'linear': tfidfmerged = BatchNormalization()(tfidfmerged) predictions = Activation(final_activation)(tfidfmerged) else: # if no rnn if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Mean over Time if args.aggregation == 'mot': x = MeanOverTime(mask_zero=True)(x) else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') z = merge([x, pca_input], mode='concat') else: z = x # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': z = Dense(args.dense, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(z) else: z = Dense(args.dense, init=dense_init)(z) if final_activation == 'relu' or final_activation == 'linear': z = BatchNormalization()(z) z = Activation(dense_activation)(z) if args.dropout_prob > 0: z = Dropout(args.dropout_prob)(z) # Final Prediction Layer if args.loss == 'hng': z = Dense(num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(z) else: z = Dense(args.dense, init=dense_init)(z) if final_activation == 'relu' or final_activation == 'linear': z = BatchNormalization()(z) predictions = Activation(final_activation)(z) # Model Input/Output if args.tfidf > 0: model = Model(input=[sequence, pca_input], output=predictions) else: model = Model(input=sequence, output=predictions) # if args.model_type == 'cls': # logger.info('Building a CLASSIFICATION model') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # if args.rnn_dim > 0: # x = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(x) # predictions = Dense(num_outputs, activation='softmax')(x) # model = Model(input=sequence, output=predictions) # elif args.model_type == 'clsp': # elif args.model_type == 'mlp': # logger.info('Building a linear model with POOLING') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.dropout_prob > 0: # x = Dropout(args.dropout_prob)(x) # x = MeanOverTime(mask_zero=True)(x) # if args.tfidf > 0: # z = merge([x,pca_input], mode='concat') # else: # z = x # if args.dense > 0: # z = Dense(args.dense, activation='tanh')(z) # if args.dropout_prob > 0: # z = Dropout(args.dropout_prob)(z) # predictions = Dense(num_outputs, activation='softmax')(z) # if args.tfidf > 0: # model = Model(input=[sequence, pca_input], output=predictions) # else: # model = Model(input=sequence, output=predictions) # # elif args.model_type == 'reg': # logger.info('Building a REGRESSION model') # model = Sequential() # model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)) # if args.cnn_dim > 0: # model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) # if args.rnn_dim > 0: # model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) # if args.dropout_prob > 0: # model.add(Dropout(args.dropout_prob)) # model.add(Dense(num_outputs)) # if not args.skip_init_bias: # bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # model.layers[-1].b.set_value(bias_value) # model.add(Activation('sigmoid')) # # elif args.model_type == 'regp': # logger.info('Building a REGRESSION model with POOLING') # model = Sequential() # model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)) # if args.cnn_dim > 0: # model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) # if args.rnn_dim > 0: # model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) # if args.dropout_prob > 0: # model.add(Dropout(args.dropout_prob)) # if args.aggregation == 'mot': # model.add(MeanOverTime(mask_zero=True)) # elif args.aggregation.startswith('att'): # model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) # model.add(Dense(num_outputs)) # if not args.skip_init_bias: # bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # # model.layers[-1].b.set_value(bias_value) # K.set_value(model.layers[-1].b, bias_value) # model.add(Activation('sigmoid')) # # elif args.model_type == 'breg': # logger.info('Building a BIDIRECTIONAL REGRESSION model') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) # if args.rnn_dim > 0: # forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) # backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) # if args.dropout_prob > 0: # forwards = Dropout(args.dropout_prob)(forwards) # backwards = Dropout(args.dropout_prob)(backwards) # merged = merge([forwards, backwards], mode='concat', concat_axis=-1) # densed = Dense(num_outputs)(merged) # if not args.skip_init_bias: # raise NotImplementedError # score = Activation('sigmoid')(densed) # model = Model(input=sequence, output=score) # # elif args.model_type == 'bregp': # logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) # if args.rnn_dim > 0: # forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) # backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) # if args.dropout_prob > 0: # forwards = Dropout(args.dropout_prob)(forwards) # backwards = Dropout(args.dropout_prob)(backwards) # forwards_mean = MeanOverTime(mask_zero=True)(forwards) # backwards_mean = MeanOverTime(mask_zero=True)(backwards) # merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) # densed = Dense(num_outputs)(merged) # if not args.skip_init_bias: # raise NotImplementedError # score = Activation('sigmoid')(densed) # model = Model(input=sequence, output=score) logger.info(' Model Done') return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation from .my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) if args.model_type == 'cls': raise NotImplementedError elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() # model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True, trainable=False)) model.add( Embedding(args.vocab_size, args.emb_dim, mask_zero=True, trainable=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) # if not args.skip_init_bias: # bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # model.layers[-1].b.set_value(bias_value) # model.layers[-1].bias_initializer = keras.initializers.Constant(value=bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add( Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # model.layers[-1].b.set_value(bias_value) model.layers[-1].bias_initializer = keras.initializers.Constant( value=bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from .w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].W.set_value( emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].W.get_value())) logger.info(' Done') return model
def create_model(args, initial_mean_value, vocab): from keras.layers.embeddings import Embedding from keras.models import Sequential from keras.layers.core import Dense, Activation from keras.layers import Bidirectional from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # # dropout_W = 0.5 # default=0.5 # dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: print("Dim of initial_mean_value is 0") initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) print("Dim of initial_mean_value is:", num_outputs) if args.model_type == 'cls': raise NotImplementedError logger.info('Building the model:%s' % args.model_type) model = Sequential() logger.info(' Adding the Embedding layer') model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) model.emb_index = 0 if args.emb_path: from nea.w2vEmbReader import W2VEmbReader as EmbReader logger.info(' Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) # ipdb.set_trace() # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value())) model.layers[model.emb_index].set_weights([ emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights()[0]) ]) # ipdb.set_trace() logger.info(' Done') # Add cnn layer if args.cnn_dim > 0: logger.info(' Adding the CNN layer') logger.info(' cnn_dim:%d' % args.cnn_dim) logger.info(' window_size:%d' % args.cnn_window_size) model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) logger.info(' Done') # Add LSTM RNN layer logger.info(' Adding the LSTM-RNN layer') if 'p' in args.model_type: layer = RNN(args.rnn_dim, return_sequences=True ) #, dropout_W=dropout_W, dropout_U=dropout_U) else: layer = RNN(args.rnn_dim, return_sequences=False) if 'b' in args.model_type: # BiLSTM logger.info(' Bidirectional layer created!') layer = Bidirectional(layer) model.add(layer) logger.info(' Done') # Add MOT or ATT layer if 'p' in args.model_type: if args.aggregation == 'mot': logger.info(' Adding the MOT layer') model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): logger.info(' Adding the ATT layer') model.add( Attention(op=args.aggregation, activation='tanh', name='att', init_stdev=0.01)) model.add(Dense(num_outputs)) logger.info(' Done') model.add(Activation('sigmoid')) logger.info('All done!') return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # if args.dropout_w > 0: dropout_W = args.dropout_w else: dropout_W = args.dropout_prob # default=0.5 if args.dropout_u > 0: dropout_U = args.dropout_u else: dropout_U = args.dropout_prob # default=0.1 cnn_border_mode = 'same' if args.model_type == 'reg': if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) else: num_outputs = initial_mean_value ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: def my_init(shape, name=None): from nea.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_matrix = np.random.random(shape) # logger.info(' initial matrix \n %s ' % (emb_matrix,)) emb_matrix = emb_reader.get_emb_matrix_given_vocab( vocab, emb_matrix) # from keras.backend import set_value, get_value # set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W))) # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value())) # logger.info(' pre-trained matrix \n %s ' % (emb_matrix,)) return K.variable(emb_matrix, name=name) logger.info(' Use pre-trained embedding') else: my_init = 'uniform' logger.info(' Use default initializing embedding') ############################################################################################################################### ## Model Stacking # if args.model_type == 'cls': logger.info('Building a CLASSIFICATION model with POOLING') dense_activation = 'tanh' dense_init = 'glorot_normal' if args.loss == 'cnp': final_activation = 'softmax' final_init = 'glorot_uniform' elif args.loss == 'hng': final_activation = 'linear' final_init = 'glorot_uniform' elif args.model_type == 'reg': logger.info('Building a REGRESSION model with POOLING') dense_activation = 'tanh' dense_init = 'he_normal' if args.normalize: final_activation = 'sigmoid' final_init = 'he_normal' else: final_activation = 'relu' final_init = 'he_uniform' else: raise NotImplementedError sequence = Input(shape=(overal_maxlen, ), dtype='int32') x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # Conv Layer if args.cnn_dim > 0: x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # RNN Layer if args.rnn_dim > 0: rnn_layer = RNN(args.rnn_dim, return_sequences=True, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) if args.bi: rnn_layer = Bidirectional(rnn_layer) x = rnn_layer(x) if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Stack 2 Layers if args.rnn_2l or args.rnn_3l: rnn_layer2 = RNN(args.rnn_dim, return_sequences=True, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) if args.bi: rnn_layer2 = Bidirectional(rnn_layer2) x = rnn_layer2(x) if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Stack 3 Layers if args.rnn_3l: rnn_layer3 = RNN(args.rnn_dim, return_sequences=True, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) if args.bi: rnn_layer3 = Bidirectional(rnn_layer3) x = rnn_layer3(x) if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Mean over Time if args.aggregation == 'mot': x = MeanOverTime(mask_zero=True)(x) elif args.aggregation == 'att': attention_rnn = RNN(args.rnn_dim, return_sequences=False, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) attention_rnn = Attention(attention_rnn) x = attention_rnn(x) else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') merged = merge([x, pca_input], mode='concat') else: merged = x # Augmented Numerical Features if args.features: ftr_input = Input(shape=(13, ), dtype='float32') merged = merge([merged, ftr_input], mode='concat') # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': merged = DenseWithMasking(num_outputs, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=l2(0.001))(merged) else: merged = DenseWithMasking(num_outputs, init=dense_init)(merged) if final_activation == 'relu' or final_activation == 'linear': merged = BatchNormalization()(merged) merged = Activation(dense_activation)(merged) if args.dropout_prob > 0: merged = Dropout(args.dropout_prob)(merged) # Final Prediction Layer if args.loss == 'hng': merged = DenseWithMasking(num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=l2(0.001))(merged) else: merged = DenseWithMasking(num_outputs, init=final_init)(merged) if final_activation == 'relu' or final_activation == 'linear': merged = BatchNormalization()(merged) predictions = Activation(final_activation)(merged) # Model Input/Output model_input = [ sequence, ] if args.tfidf > 0: model_input.append(pca_input) if args.features: model_input.append(ftr_input) model = Model(input=model_input, output=predictions) logger.info(' Model Done') return model
def create_model(args, vocab, num_outputs, overal_maxlen, maxlen_aspect): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout = args.dropout_W recurrent_dropout = args.dropout_U vocab_size = len(vocab) logger.info('Building a LSTM attention model to predict term/aspect sentiment') print '\n\n' ##### Inputs ##### sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input') aspect_input = Input(shape=(maxlen_aspect,), dtype='int32', name='aspect_input') pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input') ##### construct word embedding layer ##### word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb') ### represent aspect as averaged word embedding ### print 'use average term embs as aspect embedding' aspect_term_embs = word_emb(aspect_input) aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs) ### sentence representation ### sentence_output = word_emb(sentence_input) pretrain_output = word_emb(pretrain_input) print 'use a rnn layer' rnn = RNN(args.rnn_dim, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm') sentence_output = rnn(sentence_output) pretrain_output = rnn(pretrain_output) print 'use content attention to get term weights' att_weights = Attention(name='att_weights')([sentence_output, aspect_embs]) sentence_output = WeightedSum()([sentence_output, att_weights]) pretrain_output = Average(mask_zero=True)(pretrain_output) if args.dropout_prob > 0: print 'use dropout layer' sentence_output = Dropout(args.dropout_prob)(sentence_output) pretrain_output = Dropout(args.dropout_prob)(pretrain_output) sentence_output = Dense(num_outputs, name='dense_1')(sentence_output) pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_output) aspect_probs = Activation('softmax', name='aspect_model')(sentence_output) doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output) model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs]) logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.is_pretrain: import pickle print 'Set embedding, lstm, and dense weights from pre-trained models' if args.domain == 'lt': f_1 = open('../pretrained_weights/lstm_weights_lt%.1f.pkl'%(args.percetage), 'rb') f_2 = open('../pretrained_weights/dense_weights_lt%.1f.pkl'%(args.percetage), 'rb') else: f_1 = open('../pretrained_weights/lstm_weights_res%.1f.pkl'%(args.percetage), 'rb') f_2 = open('../pretrained_weights/dense_weights_res%.1f.pkl'%(args.percetage), 'rb') lstm_weights = pickle.load(f_1) dense_weights = pickle.load(f_2) model.get_layer('lstm').set_weights(lstm_weights) model.get_layer('dense_1').set_weights(dense_weights) model.get_layer('dense_2').set_weights(dense_weights) from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_path = '../glove/%s.txt'%(args.domain) emb_reader = EmbReader(args, emb_path) model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights())) logger.info(' Done') return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: #expand the dims initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) #预测的分数种类数 if args.model_type == 'cls': raise NotImplementedError #embedding-->cnn-->rnn(return_sequence=false)-->dropout-->dense-->sigmoid elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() #确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。 #如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 1 #此处,input层省略是因为input_length有默认值 model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: #border_mode==padding?? subsample_length==pooling?? where is the activation?? model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: #return_sequence 只返回最后一个 state model.add( RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) if not args.skip_init_bias: #初始化最后一层layer的bias bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) #输出区间为(0,1) #设置model的embed层的序号,方便后续用预训练词向量的初始化,model的所有层都存在 model.layers 里 model.emb_index = 0 #embedding-->cnn-->rnn(return_sequence=true)-->dropout-->MeanoverTime or Attention(mean or sum)-->Dense-->sigmoid elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add( Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 #embedding-->cnn-->birnn(return_sequence=false)-->dropout-->merge(concat the forRnn&backRnn)-->dense-->sigmoid elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #这句应该是多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 #embedding-->cnn-->biRnn(return_sequence=true)-->dropout-->meanOverTime-->merge(concat)-->dense-->sigmoid elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].W.set_value( emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].W.get_value())) logger.info(' Done') return model