def __init__(self, args, emb_index, bidirec, initial_mean_value, overal_maxlen=0): super(REGRESSION, self).__init__() self.dropout_W = 0.5 # default=0.5 self.dropout_U = 0.1 # default=0.1 self.args = args cnn_border_mode = 'same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) if args.recurrent_unit == 'lstm': from torch.nn import LSTM as RNN elif args.recurrent_unit == 'gru': from torch.nn import GRU as RNN elif args.recurrent_unit == 'simple': from torch.nn import RNN as RNN self.embed = Embedding(args.vocab_size, args.emb_dim) outputdim = args.emb_dim if args.cnn_dim > 0: self.conv = Conv1DWithMasking(outputdim, args.cnn_dim, args.cnn_window_size, 1, (args.cnn_window_size - 1) // 2) outputdim = args.cnn_dim if args.rnn_dim > 0: self.rnn = RNN(outputdim, args.rnn_dim, num_layers=1, bias=True, dropout=self.dropout_W, batch_first=True, bidirectional=bidirec) outputdim = args.rnn_dim if bidirec == 1: outputdim = args.rnn_dim * 2 if args.dropout_prob > 0: self.dropout = Dropout(args.dropout_prob) if args.aggregation == 'mot': self.mot = MeanOverTime() elif args.aggregation.startswith('att'): self.att = Attention(outputdim, op=args.aggregation, activation='tanh', init_stdev=0.01) self.linear = Linear(outputdim, num_outputs) # if not args.skip_init_bias: # self.linear.bias.data = (torch.log(initial_mean_value) - torch.log(1 - initial_mean_value)).float() self.emb_index = emb_index if args.emb_path: from .w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) self.embed[ emb_index].weight.data = emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights()) logger.info(' Done')
epochs=args.epochs, verbose=1, callbacks=cbks, validation_data=(dev_x, dev_y), shuffle=True) # Load best model logger.info('Loading weights from %s', out_dir + '/best_model_weight_cb.h5') model_weight = create_model(args, train_y.mean(axis=0), vocab) model_weight.compile(optimizer=optimizer, loss=loss) model_weight.load_weights(out_dir + '/best_model_weight_cb.h5', by_name=True) model_weight.model.save(out_dir + '/best_model.h5', overwrite=True) logger.info('Loading model from %s', out_dir + '/best_model_cb.h5') model_load = load_model(out_dir + '/best_model_cb.h5', custom_objects={'MeanOverTime': MeanOverTime()}) model_load.save_weights(out_dir + '/best_model_weights.h5') np.savetxt(out_dir + '/test_x.txt', test_x, fmt='%d') np.savetxt(out_dir + '/test_y_org.txt', test_y_org, fmt='%.4f') np.savetxt(out_dir + '/test_y.txt', test_y, fmt='%.4f') logger.info('Evaluate model_load:') # score, accu = model_load.evaluate(test_x, test_y, verbose=1) test_pred = model_load.predict(test_x).squeeze() * 3 qwk = QWK(test_y_org.astype(int), np.rint(test_pred).astype(int), labels=None, weights='quadratic', sample_weight=None) np.savetxt(out_dir + '/test_pred_model_load.txt', test_pred, fmt='%.4f')
def create_model(args, initial_mean_value, overal_maxlen, vocab): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # if args.dropout_w > 0: dropout_W = args.dropout_w else: dropout_W = args.dropout_prob # default=0.5 if args.dropout_u > 0: dropout_U = args.dropout_u else: dropout_U = args.dropout_prob # default=0.1 cnn_border_mode = 'same' if args.model_type == 'reg': if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) else: num_outputs = initial_mean_value ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: def my_init(shape, name=None): from nea.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_matrix = np.random.random(shape) # logger.info(' initial matrix \n %s ' % (emb_matrix,)) emb_matrix = emb_reader.get_emb_matrix_given_vocab( vocab, emb_matrix) # from keras.backend import set_value, get_value # set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W))) # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value())) # logger.info(' pre-trained matrix \n %s ' % (emb_matrix,)) return K.variable(emb_matrix, name=name) logger.info(' Use pre-trained embedding') else: my_init = 'uniform' logger.info(' Use default initializing embedding') ############################################################################################################################### ## Model Stacking # if args.model_type == 'cls': logger.info('Building a CLASSIFICATION model with POOLING') dense_activation = 'tanh' dense_init = 'glorot_normal' final_init = 'glorot_uniform' if args.loss == 'cnp': final_activation = 'softmax' elif args.loss == 'hng': final_activation = 'linear' elif args.model_type == 'reg': logger.info('Building a REGRESSION model with POOLING') if args.normalize: final_activation = 'sigmoid' final_init = 'he_normal' dense_activation = 'tanh' dense_init = 'he_normal' else: final_activation = 'relu' final_init = 'he_uniform' dense_activation = 'tanh' dense_init = 'he_uniform' else: raise NotImplementedError sequence = Input(shape=(overal_maxlen, ), dtype='int32') x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # Conv Layer if args.cnn_dim > 0: x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # RNN Layer if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(x) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(x) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) # Stack 2 Layers if args.rnn_2l or args.rnn_3l: if args.bi: merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(merged) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(merged) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) # Stack 3 Layers if args.rnn_3l: if args.bi: merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(merged) if args.bi: backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(merged) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) if args.bi: backwards = Dropout(args.dropout_prob)(backwards) if args.aggregation == 'mot': forwards = MeanOverTime(mask_zero=True)(forwards) if args.bi: backwards = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) else: merged = forwards else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') tfidfmerged = merge([merged, pca_input], mode='concat') else: tfidfmerged = merged # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': tfidfmerged = Dense( num_outputs, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(tfidfmerged) else: tfidfmerged = Dense(num_outputs, init=dense_init)(tfidfmerged) if final_activation == 'relu' or final_activation == 'linear': tfidfmerged = BatchNormalization()(tfidfmerged) tfidfmerged = Activation(dense_activation)(tfidfmerged) if args.dropout_prob > 0: tfidfmerged = Dropout(args.dropout_prob)(tfidfmerged) # Final Prediction Layer if args.loss == 'hng': tfidfmerged = Dense( num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(tfidfmerged) else: tfidfmerged = Dense(num_outputs, init=final_init)(tfidfmerged) if final_activation == 'relu' or final_activation == 'linear': tfidfmerged = BatchNormalization()(tfidfmerged) predictions = Activation(final_activation)(tfidfmerged) else: # if no rnn if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Mean over Time if args.aggregation == 'mot': x = MeanOverTime(mask_zero=True)(x) else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') z = merge([x, pca_input], mode='concat') else: z = x # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': z = Dense(args.dense, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(z) else: z = Dense(args.dense, init=dense_init)(z) if final_activation == 'relu' or final_activation == 'linear': z = BatchNormalization()(z) z = Activation(dense_activation)(z) if args.dropout_prob > 0: z = Dropout(args.dropout_prob)(z) # Final Prediction Layer if args.loss == 'hng': z = Dense(num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=activity_l2(0.001))(z) else: z = Dense(args.dense, init=dense_init)(z) if final_activation == 'relu' or final_activation == 'linear': z = BatchNormalization()(z) predictions = Activation(final_activation)(z) # Model Input/Output if args.tfidf > 0: model = Model(input=[sequence, pca_input], output=predictions) else: model = Model(input=sequence, output=predictions) # if args.model_type == 'cls': # logger.info('Building a CLASSIFICATION model') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # if args.rnn_dim > 0: # x = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(x) # predictions = Dense(num_outputs, activation='softmax')(x) # model = Model(input=sequence, output=predictions) # elif args.model_type == 'clsp': # elif args.model_type == 'mlp': # logger.info('Building a linear model with POOLING') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.dropout_prob > 0: # x = Dropout(args.dropout_prob)(x) # x = MeanOverTime(mask_zero=True)(x) # if args.tfidf > 0: # z = merge([x,pca_input], mode='concat') # else: # z = x # if args.dense > 0: # z = Dense(args.dense, activation='tanh')(z) # if args.dropout_prob > 0: # z = Dropout(args.dropout_prob)(z) # predictions = Dense(num_outputs, activation='softmax')(z) # if args.tfidf > 0: # model = Model(input=[sequence, pca_input], output=predictions) # else: # model = Model(input=sequence, output=predictions) # # elif args.model_type == 'reg': # logger.info('Building a REGRESSION model') # model = Sequential() # model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)) # if args.cnn_dim > 0: # model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) # if args.rnn_dim > 0: # model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) # if args.dropout_prob > 0: # model.add(Dropout(args.dropout_prob)) # model.add(Dense(num_outputs)) # if not args.skip_init_bias: # bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # model.layers[-1].b.set_value(bias_value) # model.add(Activation('sigmoid')) # # elif args.model_type == 'regp': # logger.info('Building a REGRESSION model with POOLING') # model = Sequential() # model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)) # if args.cnn_dim > 0: # model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) # if args.rnn_dim > 0: # model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) # if args.dropout_prob > 0: # model.add(Dropout(args.dropout_prob)) # if args.aggregation == 'mot': # model.add(MeanOverTime(mask_zero=True)) # elif args.aggregation.startswith('att'): # model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) # model.add(Dense(num_outputs)) # if not args.skip_init_bias: # bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) # # model.layers[-1].b.set_value(bias_value) # K.set_value(model.layers[-1].b, bias_value) # model.add(Activation('sigmoid')) # # elif args.model_type == 'breg': # logger.info('Building a BIDIRECTIONAL REGRESSION model') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) # if args.rnn_dim > 0: # forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) # backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) # if args.dropout_prob > 0: # forwards = Dropout(args.dropout_prob)(forwards) # backwards = Dropout(args.dropout_prob)(backwards) # merged = merge([forwards, backwards], mode='concat', concat_axis=-1) # densed = Dense(num_outputs)(merged) # if not args.skip_init_bias: # raise NotImplementedError # score = Activation('sigmoid')(densed) # model = Model(input=sequence, output=score) # # elif args.model_type == 'bregp': # logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') # sequence = Input(shape=(overal_maxlen,), dtype='int32') # output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # if args.cnn_dim > 0: # output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) # if args.rnn_dim > 0: # forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) # backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) # if args.dropout_prob > 0: # forwards = Dropout(args.dropout_prob)(forwards) # backwards = Dropout(args.dropout_prob)(backwards) # forwards_mean = MeanOverTime(mask_zero=True)(forwards) # backwards_mean = MeanOverTime(mask_zero=True)(backwards) # merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) # densed = Dense(num_outputs)(merged) # if not args.skip_init_bias: # raise NotImplementedError # score = Activation('sigmoid')(densed) # model = Model(input=sequence, output=score) logger.info(' Model Done') return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode='same' if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) if args.model_type == 'cls': raise NotImplementedError elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].bias = bias_value model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].bias = bias_value model.add(Activation('sigmoid')) model.emb_index = 0 elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen,), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() sequence = Input(shape=(overal_maxlen,), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].get_weights())) logger.info(' Done') return model
def create_model(args, initial_mean_value, vocab): from keras.layers.embeddings import Embedding from keras.models import Sequential from keras.layers.core import Dense, Activation from keras.layers import Bidirectional from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # # dropout_W = 0.5 # default=0.5 # dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: print("Dim of initial_mean_value is 0") initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) print("Dim of initial_mean_value is:", num_outputs) if args.model_type == 'cls': raise NotImplementedError logger.info('Building the model:%s' % args.model_type) model = Sequential() logger.info(' Adding the Embedding layer') model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) model.emb_index = 0 if args.emb_path: from nea.w2vEmbReader import W2VEmbReader as EmbReader logger.info(' Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) # ipdb.set_trace() # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value())) model.layers[model.emb_index].set_weights([ emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].get_weights()[0]) ]) # ipdb.set_trace() logger.info(' Done') # Add cnn layer if args.cnn_dim > 0: logger.info(' Adding the CNN layer') logger.info(' cnn_dim:%d' % args.cnn_dim) logger.info(' window_size:%d' % args.cnn_window_size) model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) logger.info(' Done') # Add LSTM RNN layer logger.info(' Adding the LSTM-RNN layer') if 'p' in args.model_type: layer = RNN(args.rnn_dim, return_sequences=True ) #, dropout_W=dropout_W, dropout_U=dropout_U) else: layer = RNN(args.rnn_dim, return_sequences=False) if 'b' in args.model_type: # BiLSTM logger.info(' Bidirectional layer created!') layer = Bidirectional(layer) model.add(layer) logger.info(' Done') # Add MOT or ATT layer if 'p' in args.model_type: if args.aggregation == 'mot': logger.info(' Adding the MOT layer') model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): logger.info(' Adding the ATT layer') model.add( Attention(op=args.aggregation, activation='tanh', name='att', init_stdev=0.01)) model.add(Dense(num_outputs)) logger.info(' Done') model.add(Activation('sigmoid')) logger.info('All done!') return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # if args.dropout_w > 0: dropout_W = args.dropout_w else: dropout_W = args.dropout_prob # default=0.5 if args.dropout_u > 0: dropout_U = args.dropout_u else: dropout_U = args.dropout_prob # default=0.1 cnn_border_mode = 'same' if args.model_type == 'reg': if initial_mean_value.ndim == 0: initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) else: num_outputs = initial_mean_value ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: def my_init(shape, name=None): from nea.w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) emb_matrix = np.random.random(shape) # logger.info(' initial matrix \n %s ' % (emb_matrix,)) emb_matrix = emb_reader.get_emb_matrix_given_vocab( vocab, emb_matrix) # from keras.backend import set_value, get_value # set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W))) # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value())) # logger.info(' pre-trained matrix \n %s ' % (emb_matrix,)) return K.variable(emb_matrix, name=name) logger.info(' Use pre-trained embedding') else: my_init = 'uniform' logger.info(' Use default initializing embedding') ############################################################################################################################### ## Model Stacking # if args.model_type == 'cls': logger.info('Building a CLASSIFICATION model with POOLING') dense_activation = 'tanh' dense_init = 'glorot_normal' if args.loss == 'cnp': final_activation = 'softmax' final_init = 'glorot_uniform' elif args.loss == 'hng': final_activation = 'linear' final_init = 'glorot_uniform' elif args.model_type == 'reg': logger.info('Building a REGRESSION model with POOLING') dense_activation = 'tanh' dense_init = 'he_normal' if args.normalize: final_activation = 'sigmoid' final_init = 'he_normal' else: final_activation = 'relu' final_init = 'he_uniform' else: raise NotImplementedError sequence = Input(shape=(overal_maxlen, ), dtype='int32') x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence) # Conv Layer if args.cnn_dim > 0: x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x) # RNN Layer if args.rnn_dim > 0: rnn_layer = RNN(args.rnn_dim, return_sequences=True, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) if args.bi: rnn_layer = Bidirectional(rnn_layer) x = rnn_layer(x) if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Stack 2 Layers if args.rnn_2l or args.rnn_3l: rnn_layer2 = RNN(args.rnn_dim, return_sequences=True, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) if args.bi: rnn_layer2 = Bidirectional(rnn_layer2) x = rnn_layer2(x) if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Stack 3 Layers if args.rnn_3l: rnn_layer3 = RNN(args.rnn_dim, return_sequences=True, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) if args.bi: rnn_layer3 = Bidirectional(rnn_layer3) x = rnn_layer3(x) if args.dropout_prob > 0: x = Dropout(args.dropout_prob)(x) # Mean over Time if args.aggregation == 'mot': x = MeanOverTime(mask_zero=True)(x) elif args.aggregation == 'att': attention_rnn = RNN(args.rnn_dim, return_sequences=False, consume_less=args.rnn_opt, dropout_W=dropout_W, dropout_U=dropout_U) attention_rnn = Attention(attention_rnn) x = attention_rnn(x) else: raise NotImplementedError # Augmented TF/IDF Layer if args.tfidf > 0: pca_input = Input(shape=(args.tfidf, ), dtype='float32') merged = merge([x, pca_input], mode='concat') else: merged = x # Augmented Numerical Features if args.features: ftr_input = Input(shape=(13, ), dtype='float32') merged = merge([merged, ftr_input], mode='concat') # Optional Dense Layer if args.dense > 0: if args.loss == 'hng': merged = DenseWithMasking(num_outputs, init=dense_init, W_regularizer=l2(0.001), activity_regularizer=l2(0.001))(merged) else: merged = DenseWithMasking(num_outputs, init=dense_init)(merged) if final_activation == 'relu' or final_activation == 'linear': merged = BatchNormalization()(merged) merged = Activation(dense_activation)(merged) if args.dropout_prob > 0: merged = Dropout(args.dropout_prob)(merged) # Final Prediction Layer if args.loss == 'hng': merged = DenseWithMasking(num_outputs, init=final_init, W_regularizer=l2(0.001), activity_regularizer=l2(0.001))(merged) else: merged = DenseWithMasking(num_outputs, init=final_init)(merged) if final_activation == 'relu' or final_activation == 'linear': merged = BatchNormalization()(merged) predictions = Activation(final_activation)(merged) # Model Input/Output model_input = [ sequence, ] if args.tfidf > 0: model_input.append(pca_input) if args.features: model_input.append(ftr_input) model = Model(input=model_input, output=predictions) logger.info(' Model Done') return model
def create_model(args, initial_mean_value, overal_maxlen, vocab): import keras.backend as K from keras.layers.embeddings import Embedding from keras.models import Sequential, Model from keras.layers.core import Dense, Dropout, Activation from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking ############################################################################################################################### ## Recurrence unit type # if args.recurrent_unit == 'lstm': from keras.layers.recurrent import LSTM as RNN elif args.recurrent_unit == 'gru': from keras.layers.recurrent import GRU as RNN elif args.recurrent_unit == 'simple': from keras.layers.recurrent import SimpleRNN as RNN ############################################################################################################################### ## Create Model # dropout_W = 0.5 # default=0.5 dropout_U = 0.1 # default=0.1 cnn_border_mode = 'same' if initial_mean_value.ndim == 0: #expand the dims initial_mean_value = np.expand_dims(initial_mean_value, axis=1) num_outputs = len(initial_mean_value) #预测的分数种类数 if args.model_type == 'cls': raise NotImplementedError #embedding-->cnn-->rnn(return_sequence=false)-->dropout-->dense-->sigmoid elif args.model_type == 'reg': logger.info('Building a REGRESSION model') model = Sequential() #确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。 #如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 1 #此处,input层省略是因为input_length有默认值 model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: #border_mode==padding?? subsample_length==pooling?? where is the activation?? model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: #return_sequence 只返回最后一个 state model.add( RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) model.add(Dense(num_outputs)) if not args.skip_init_bias: #初始化最后一层layer的bias bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) #输出区间为(0,1) #设置model的embed层的序号,方便后续用预训练词向量的初始化,model的所有层都存在 model.layers 里 model.emb_index = 0 #embedding-->cnn-->rnn(return_sequence=true)-->dropout-->MeanoverTime or Attention(mean or sum)-->Dense-->sigmoid elif args.model_type == 'regp': logger.info('Building a REGRESSION model with POOLING') model = Sequential() model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True)) if args.cnn_dim > 0: model.add( Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)) if args.rnn_dim > 0: model.add( RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)) if args.dropout_prob > 0: model.add(Dropout(args.dropout_prob)) if args.aggregation == 'mot': model.add(MeanOverTime(mask_zero=True)) elif args.aggregation.startswith('att'): model.add( Attention(op=args.aggregation, activation='tanh', init_stdev=0.01)) model.add(Dense(num_outputs)) if not args.skip_init_bias: bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx()) model.layers[-1].b.set_value(bias_value) model.add(Activation('sigmoid')) model.emb_index = 0 #embedding-->cnn-->birnn(return_sequence=false)-->dropout-->merge(concat the forRnn&backRnn)-->dense-->sigmoid elif args.model_type == 'breg': logger.info('Building a BIDIRECTIONAL REGRESSION model') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #这句应该是多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) merged = merge([forwards, backwards], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 #embedding-->cnn-->biRnn(return_sequence=true)-->dropout-->meanOverTime-->merge(concat)-->dense-->sigmoid elif args.model_type == 'bregp': logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING') from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge model = Sequential() #多余的 sequence = Input(shape=(overal_maxlen, ), dtype='int32') output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence) if args.cnn_dim > 0: output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output) if args.rnn_dim > 0: forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output) backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output) if args.dropout_prob > 0: forwards = Dropout(args.dropout_prob)(forwards) backwards = Dropout(args.dropout_prob)(backwards) forwards_mean = MeanOverTime(mask_zero=True)(forwards) backwards_mean = MeanOverTime(mask_zero=True)(backwards) merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1) densed = Dense(num_outputs)(merged) if not args.skip_init_bias: raise NotImplementedError score = Activation('sigmoid')(densed) model = Model(input=sequence, output=score) model.emb_index = 1 logger.info(' Done') ############################################################################################################################### ## Initialize embeddings if requested # if args.emb_path: from w2vEmbReader import W2VEmbReader as EmbReader logger.info('Initializing lookup table') emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim) model.layers[model.emb_index].W.set_value( emb_reader.get_emb_matrix_given_vocab( vocab, model.layers[model.emb_index].W.get_value())) logger.info(' Done') return model