Пример #1
0
    def __init__(self,
                 args,
                 emb_index,
                 bidirec,
                 initial_mean_value,
                 overal_maxlen=0):
        super(REGRESSION, self).__init__()
        self.dropout_W = 0.5  # default=0.5
        self.dropout_U = 0.1  # default=0.1
        self.args = args
        cnn_border_mode = 'same'
        if initial_mean_value.ndim == 0:
            initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
        num_outputs = len(initial_mean_value)
        if args.recurrent_unit == 'lstm':
            from torch.nn import LSTM as RNN
        elif args.recurrent_unit == 'gru':
            from torch.nn import GRU as RNN
        elif args.recurrent_unit == 'simple':
            from torch.nn import RNN as RNN

        self.embed = Embedding(args.vocab_size, args.emb_dim)
        outputdim = args.emb_dim
        if args.cnn_dim > 0:
            self.conv = Conv1DWithMasking(outputdim, args.cnn_dim,
                                          args.cnn_window_size, 1,
                                          (args.cnn_window_size - 1) // 2)
            outputdim = args.cnn_dim
        if args.rnn_dim > 0:
            self.rnn = RNN(outputdim,
                           args.rnn_dim,
                           num_layers=1,
                           bias=True,
                           dropout=self.dropout_W,
                           batch_first=True,
                           bidirectional=bidirec)
            outputdim = args.rnn_dim
            if bidirec == 1:
                outputdim = args.rnn_dim * 2
        if args.dropout_prob > 0:
            self.dropout = Dropout(args.dropout_prob)
        if args.aggregation == 'mot':
            self.mot = MeanOverTime()
        elif args.aggregation.startswith('att'):
            self.att = Attention(outputdim,
                                 op=args.aggregation,
                                 activation='tanh',
                                 init_stdev=0.01)

        self.linear = Linear(outputdim, num_outputs)
        # if not args.skip_init_bias:
        # 	self.linear.bias.data = (torch.log(initial_mean_value) - torch.log(1 - initial_mean_value)).float()
        self.emb_index = emb_index
        if args.emb_path:
            from .w2vEmbReader import W2VEmbReader as EmbReader
            logger.info('Initializing lookup table')
            emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
            self.embed[
                emb_index].weight.data = emb_reader.get_emb_matrix_given_vocab(
                    vocab, model.layers[model.emb_index].get_weights())
        logger.info('  Done')
Пример #2
0
                    epochs=args.epochs,
                    verbose=1,
                    callbacks=cbks,
                    validation_data=(dev_x, dev_y),
                    shuffle=True)

# Load best model
logger.info('Loading weights from %s', out_dir + '/best_model_weight_cb.h5')
model_weight = create_model(args, train_y.mean(axis=0), vocab)
model_weight.compile(optimizer=optimizer, loss=loss)
model_weight.load_weights(out_dir + '/best_model_weight_cb.h5', by_name=True)
model_weight.model.save(out_dir + '/best_model.h5', overwrite=True)

logger.info('Loading model from %s', out_dir + '/best_model_cb.h5')
model_load = load_model(out_dir + '/best_model_cb.h5',
                        custom_objects={'MeanOverTime': MeanOverTime()})
model_load.save_weights(out_dir + '/best_model_weights.h5')

np.savetxt(out_dir + '/test_x.txt', test_x, fmt='%d')
np.savetxt(out_dir + '/test_y_org.txt', test_y_org, fmt='%.4f')
np.savetxt(out_dir + '/test_y.txt', test_y, fmt='%.4f')

logger.info('Evaluate model_load:')
# score, accu = model_load.evaluate(test_x, test_y, verbose=1)
test_pred = model_load.predict(test_x).squeeze() * 3
qwk = QWK(test_y_org.astype(int),
          np.rint(test_pred).astype(int),
          labels=None,
          weights='quadratic',
          sample_weight=None)
np.savetxt(out_dir + '/test_pred_model_load.txt', test_pred, fmt='%.4f')
Пример #3
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    if args.dropout_w > 0:
        dropout_W = args.dropout_w
    else:
        dropout_W = args.dropout_prob  # default=0.5
    if args.dropout_u > 0:
        dropout_U = args.dropout_u
    else:
        dropout_U = args.dropout_prob  # default=0.1

    cnn_border_mode = 'same'

    if args.model_type == 'reg':
        if initial_mean_value.ndim == 0:
            initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
        num_outputs = len(initial_mean_value)
    else:
        num_outputs = initial_mean_value

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:

        def my_init(shape, name=None):
            from nea.w2vEmbReader import W2VEmbReader as EmbReader
            logger.info('Initializing lookup table')
            emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
            emb_matrix = np.random.random(shape)
            # 			logger.info(' initial matrix \n %s ' % (emb_matrix,))
            emb_matrix = emb_reader.get_emb_matrix_given_vocab(
                vocab, emb_matrix)
            # 			from keras.backend import set_value, get_value
            # 			set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W)))
            # 			model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
            # 			logger.info(' pre-trained matrix \n %s ' % (emb_matrix,))
            return K.variable(emb_matrix, name=name)

        logger.info(' Use pre-trained embedding')
    else:
        my_init = 'uniform'
        logger.info(' Use default initializing embedding')

    ###############################################################################################################################
    ## Model Stacking
    #

    if args.model_type == 'cls':
        logger.info('Building a CLASSIFICATION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'glorot_normal'
        final_init = 'glorot_uniform'
        if args.loss == 'cnp':
            final_activation = 'softmax'
        elif args.loss == 'hng':
            final_activation = 'linear'
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model with POOLING')
        if args.normalize:
            final_activation = 'sigmoid'
            final_init = 'he_normal'
            dense_activation = 'tanh'
            dense_init = 'he_normal'
        else:
            final_activation = 'relu'
            final_init = 'he_uniform'
            dense_activation = 'tanh'
            dense_init = 'he_uniform'
    else:
        raise NotImplementedError

    sequence = Input(shape=(overal_maxlen, ), dtype='int32')
    x = Embedding(len(vocab),
                  args.emb_dim,
                  mask_zero=True,
                  init=my_init,
                  trainable=args.embd_train)(sequence)

    # Conv Layer
    if args.cnn_dim > 0:
        x = Conv1DWithMasking(nb_filter=args.cnn_dim,
                              filter_length=args.cnn_window_size,
                              border_mode=cnn_border_mode,
                              subsample_length=1)(x)

    # RNN Layer
    if args.rnn_dim > 0:
        forwards = RNN(args.rnn_dim,
                       return_sequences=True,
                       dropout_W=dropout_W,
                       dropout_U=dropout_U)(x)
        if args.bi:
            backwards = RNN(args.rnn_dim,
                            return_sequences=True,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(x)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            if args.bi:
                backwards = Dropout(args.dropout_prob)(backwards)
        # Stack 2 Layers
        if args.rnn_2l or args.rnn_3l:
            if args.bi:
                merged = merge([forwards, backwards],
                               mode='concat',
                               concat_axis=-1)
            else:
                merged = forwards
            forwards = RNN(args.rnn_dim,
                           return_sequences=True,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(merged)
            if args.bi:
                backwards = RNN(args.rnn_dim,
                                return_sequences=True,
                                dropout_W=dropout_W,
                                dropout_U=dropout_U,
                                go_backwards=True)(merged)
            if args.dropout_prob > 0:
                forwards = Dropout(args.dropout_prob)(forwards)
                if args.bi:
                    backwards = Dropout(args.dropout_prob)(backwards)
            # Stack 3 Layers
            if args.rnn_3l:
                if args.bi:
                    merged = merge([forwards, backwards],
                                   mode='concat',
                                   concat_axis=-1)
                else:
                    merged = forwards
                forwards = RNN(args.rnn_dim,
                               return_sequences=True,
                               dropout_W=dropout_W,
                               dropout_U=dropout_U)(merged)
                if args.bi:
                    backwards = RNN(args.rnn_dim,
                                    return_sequences=True,
                                    dropout_W=dropout_W,
                                    dropout_U=dropout_U,
                                    go_backwards=True)(merged)
                if args.dropout_prob > 0:
                    forwards = Dropout(args.dropout_prob)(forwards)
                    if args.bi:
                        backwards = Dropout(args.dropout_prob)(backwards)

        if args.aggregation == 'mot':
            forwards = MeanOverTime(mask_zero=True)(forwards)
            if args.bi:
                backwards = MeanOverTime(mask_zero=True)(backwards)
                merged = merge([forwards, backwards],
                               mode='concat',
                               concat_axis=-1)
            else:
                merged = forwards
        else:
            raise NotImplementedError

        # Augmented TF/IDF Layer
        if args.tfidf > 0:
            pca_input = Input(shape=(args.tfidf, ), dtype='float32')
            tfidfmerged = merge([merged, pca_input], mode='concat')
        else:
            tfidfmerged = merged

        # Optional Dense Layer
        if args.dense > 0:
            if args.loss == 'hng':
                tfidfmerged = Dense(
                    num_outputs,
                    init=dense_init,
                    W_regularizer=l2(0.001),
                    activity_regularizer=activity_l2(0.001))(tfidfmerged)
            else:
                tfidfmerged = Dense(num_outputs, init=dense_init)(tfidfmerged)
            if final_activation == 'relu' or final_activation == 'linear':
                tfidfmerged = BatchNormalization()(tfidfmerged)
            tfidfmerged = Activation(dense_activation)(tfidfmerged)
            if args.dropout_prob > 0:
                tfidfmerged = Dropout(args.dropout_prob)(tfidfmerged)

        # Final Prediction Layer
        if args.loss == 'hng':
            tfidfmerged = Dense(
                num_outputs,
                init=final_init,
                W_regularizer=l2(0.001),
                activity_regularizer=activity_l2(0.001))(tfidfmerged)
        else:
            tfidfmerged = Dense(num_outputs, init=final_init)(tfidfmerged)
        if final_activation == 'relu' or final_activation == 'linear':
            tfidfmerged = BatchNormalization()(tfidfmerged)
        predictions = Activation(final_activation)(tfidfmerged)

    else:  # if no rnn
        if args.dropout_prob > 0:
            x = Dropout(args.dropout_prob)(x)
        # Mean over Time
        if args.aggregation == 'mot':
            x = MeanOverTime(mask_zero=True)(x)
        else:
            raise NotImplementedError
        # Augmented TF/IDF Layer
        if args.tfidf > 0:
            pca_input = Input(shape=(args.tfidf, ), dtype='float32')
            z = merge([x, pca_input], mode='concat')
        else:
            z = x
        # Optional Dense Layer
        if args.dense > 0:
            if args.loss == 'hng':
                z = Dense(args.dense,
                          init=dense_init,
                          W_regularizer=l2(0.001),
                          activity_regularizer=activity_l2(0.001))(z)
            else:
                z = Dense(args.dense, init=dense_init)(z)
            if final_activation == 'relu' or final_activation == 'linear':
                z = BatchNormalization()(z)
            z = Activation(dense_activation)(z)
            if args.dropout_prob > 0:
                z = Dropout(args.dropout_prob)(z)
        # Final Prediction Layer
        if args.loss == 'hng':
            z = Dense(num_outputs,
                      init=final_init,
                      W_regularizer=l2(0.001),
                      activity_regularizer=activity_l2(0.001))(z)
        else:
            z = Dense(args.dense, init=dense_init)(z)
        if final_activation == 'relu' or final_activation == 'linear':
            z = BatchNormalization()(z)
        predictions = Activation(final_activation)(z)

    # Model Input/Output
    if args.tfidf > 0:
        model = Model(input=[sequence, pca_input], output=predictions)
    else:
        model = Model(input=sequence, output=predictions)

# 	if args.model_type == 'cls':
# 		logger.info('Building a CLASSIFICATION model')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.cnn_dim > 0:
# 			x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x)
# 		if args.rnn_dim > 0:
# 			x = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(x)
# 		predictions = Dense(num_outputs, activation='softmax')(x)
# 		model = Model(input=sequence, output=predictions)

# 	elif args.model_type == 'clsp':

# 	elif args.model_type == 'mlp':
# 		logger.info('Building a linear model with POOLING')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.dropout_prob > 0:
# 			x = Dropout(args.dropout_prob)(x)
# 		x = MeanOverTime(mask_zero=True)(x)
# 		if args.tfidf > 0:
# 			z = merge([x,pca_input], mode='concat')
# 		else:
# 			z = x
# 		if args.dense > 0:
# 			z = Dense(args.dense, activation='tanh')(z)
# 			if args.dropout_prob > 0:
# 				z = Dropout(args.dropout_prob)(z)
# 		predictions = Dense(num_outputs, activation='softmax')(z)
# 		if args.tfidf > 0:
# 			model = Model(input=[sequence, pca_input], output=predictions)
# 		else:
# 			model = Model(input=sequence, output=predictions)
#
# 	elif args.model_type == 'reg':
# 		logger.info('Building a REGRESSION model')
# 		model = Sequential()
# 		model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train))
# 		if args.cnn_dim > 0:
# 			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
# 		if args.rnn_dim > 0:
# 			model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U))
# 		if args.dropout_prob > 0:
# 			model.add(Dropout(args.dropout_prob))
# 		model.add(Dense(num_outputs))
# 		if not args.skip_init_bias:
# 			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
# 			model.layers[-1].b.set_value(bias_value)
# 		model.add(Activation('sigmoid'))
#
# 	elif args.model_type == 'regp':
# 		logger.info('Building a REGRESSION model with POOLING')
# 		model = Sequential()
# 		model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train))
# 		if args.cnn_dim > 0:
# 			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
# 		if args.rnn_dim > 0:
# 			model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U))
# 		if args.dropout_prob > 0:
# 			model.add(Dropout(args.dropout_prob))
# 		if args.aggregation == 'mot':
# 			model.add(MeanOverTime(mask_zero=True))
# 		elif args.aggregation.startswith('att'):
# 			model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01))
# 		model.add(Dense(num_outputs))
# 		if not args.skip_init_bias:
# 			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
# # 			model.layers[-1].b.set_value(bias_value)
# 			K.set_value(model.layers[-1].b, bias_value)
# 		model.add(Activation('sigmoid'))
#
# 	elif args.model_type == 'breg':
# 		logger.info('Building a BIDIRECTIONAL REGRESSION model')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.cnn_dim > 0:
# 			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
# 		if args.rnn_dim > 0:
# 			forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output)
# 			backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
# 		if args.dropout_prob > 0:
# 			forwards = Dropout(args.dropout_prob)(forwards)
# 			backwards = Dropout(args.dropout_prob)(backwards)
# 		merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
# 		densed = Dense(num_outputs)(merged)
# 		if not args.skip_init_bias:
# 			raise NotImplementedError
# 		score = Activation('sigmoid')(densed)
# 		model = Model(input=sequence, output=score)
#
# 	elif args.model_type == 'bregp':
# 		logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.cnn_dim > 0:
# 			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
# 		if args.rnn_dim > 0:
# 			forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output)
# 			backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
# 		if args.dropout_prob > 0:
# 			forwards = Dropout(args.dropout_prob)(forwards)
# 			backwards = Dropout(args.dropout_prob)(backwards)
# 		forwards_mean = MeanOverTime(mask_zero=True)(forwards)
# 		backwards_mean = MeanOverTime(mask_zero=True)(backwards)
# 		merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1)
# 		densed = Dense(num_outputs)(merged)
# 		if not args.skip_init_bias:
# 			raise NotImplementedError
# 		score = Activation('sigmoid')(densed)
# 		model = Model(input=sequence, output=score)

    logger.info('  Model Done')
    return model
Пример #4
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):
	
	import keras.backend as K
	from keras.layers.embeddings import Embedding
	from keras.models import Sequential, Model
	from keras.layers.core import Dense, Dropout, Activation
	from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking
	
	###############################################################################################################################
	## Recurrence unit type
	#

	if args.recurrent_unit == 'lstm':
		from keras.layers.recurrent import LSTM as RNN
	elif args.recurrent_unit == 'gru':
		from keras.layers.recurrent import GRU as RNN
	elif args.recurrent_unit == 'simple':
		from keras.layers.recurrent import SimpleRNN as RNN

	###############################################################################################################################
	## Create Model
	#
	
	dropout_W = 0.5		# default=0.5
	dropout_U = 0.1		# default=0.1
	cnn_border_mode='same'
	if initial_mean_value.ndim == 0:
		initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
	num_outputs = len(initial_mean_value)
	
	if args.model_type == 'cls':
		raise NotImplementedError
	
	elif args.model_type == 'reg':
		logger.info('Building a REGRESSION model')
		model = Sequential()
		model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
		if args.cnn_dim > 0:
			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
		if args.rnn_dim > 0:
			model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U))
		if args.dropout_prob > 0:
			model.add(Dropout(args.dropout_prob))
		model.add(Dense(num_outputs))
		if not args.skip_init_bias:
			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
			model.layers[-1].bias = bias_value
		model.add(Activation('sigmoid'))
		model.emb_index = 0
	
	elif args.model_type == 'regp':
		logger.info('Building a REGRESSION model with POOLING')
		model = Sequential()
		model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
		if args.cnn_dim > 0:
			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
		if args.rnn_dim > 0:
			model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U))
		if args.dropout_prob > 0:
			model.add(Dropout(args.dropout_prob))
		if args.aggregation == 'mot':
			model.add(MeanOverTime(mask_zero=True))
		elif args.aggregation.startswith('att'):
			model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01))
		model.add(Dense(num_outputs))
		if not args.skip_init_bias:
			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
			model.layers[-1].bias = bias_value
		model.add(Activation('sigmoid'))
		model.emb_index = 0

	elif args.model_type == 'breg':
		logger.info('Building a BIDIRECTIONAL REGRESSION model')
		from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
		model = Sequential()
		sequence = Input(shape=(overal_maxlen,), dtype='int32')
		output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence)
		if args.cnn_dim > 0:
			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
		if args.rnn_dim > 0:
			forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output)
			backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
		if args.dropout_prob > 0:
			forwards = Dropout(args.dropout_prob)(forwards)
			backwards = Dropout(args.dropout_prob)(backwards)
		merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
		densed = Dense(num_outputs)(merged)
		if not args.skip_init_bias:
			raise NotImplementedError
		score = Activation('sigmoid')(densed)
		model = Model(input=sequence, output=score)
		model.emb_index = 1
	
	elif args.model_type == 'bregp':
		logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
		from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
		model = Sequential()
		sequence = Input(shape=(overal_maxlen,), dtype='int32')
		output = Embedding(args.vocab_size, args.emb_dim, mask_zero=True)(sequence)
		if args.cnn_dim > 0:
			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
		if args.rnn_dim > 0:
			forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output)
			backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
		if args.dropout_prob > 0:
			forwards = Dropout(args.dropout_prob)(forwards)
			backwards = Dropout(args.dropout_prob)(backwards)
		forwards_mean = MeanOverTime(mask_zero=True)(forwards)
		backwards_mean = MeanOverTime(mask_zero=True)(backwards)
		merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1)
		densed = Dense(num_outputs)(merged)
		if not args.skip_init_bias:
			raise NotImplementedError
		score = Activation('sigmoid')(densed)
		model = Model(input=sequence, output=score)
		model.emb_index = 1
	
	logger.info('  Done')
	
	###############################################################################################################################
	## Initialize embeddings if requested
	#

	if args.emb_path:
		from w2vEmbReader import W2VEmbReader as EmbReader
		logger.info('Initializing lookup table')
		emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
		model.layers[model.emb_index].set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].get_weights()))
		logger.info('  Done')
	
	return model
Пример #5
0
def create_model(args, initial_mean_value, vocab):

    from keras.layers.embeddings import Embedding
    from keras.models import Sequential
    from keras.layers.core import Dense, Activation
    from keras.layers import Bidirectional
    from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    # dropout_W = 0.5        # default=0.5
    # dropout_U = 0.1        # default=0.1
    cnn_border_mode = 'same'
    if initial_mean_value.ndim == 0:
        print("Dim of initial_mean_value is 0")
        initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
    num_outputs = len(initial_mean_value)
    print("Dim of initial_mean_value is:", num_outputs)

    if args.model_type == 'cls':
        raise NotImplementedError

    logger.info('Building the model:%s' % args.model_type)
    model = Sequential()

    logger.info('    Adding the Embedding layer')
    model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
    model.emb_index = 0
    if args.emb_path:
        from nea.w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('        Initializing lookup table')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        # ipdb.set_trace()
        # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
        model.layers[model.emb_index].set_weights([
            emb_reader.get_emb_matrix_given_vocab(
                vocab, model.layers[model.emb_index].get_weights()[0])
        ])
        # ipdb.set_trace()
    logger.info('    Done')

    # Add cnn layer
    if args.cnn_dim > 0:
        logger.info('    Adding the CNN layer')
        logger.info('        cnn_dim:%d' % args.cnn_dim)
        logger.info('        window_size:%d' % args.cnn_window_size)
        model.add(
            Conv1DWithMasking(nb_filter=args.cnn_dim,
                              filter_length=args.cnn_window_size,
                              border_mode=cnn_border_mode,
                              subsample_length=1))
        logger.info('    Done')

    # Add LSTM RNN layer
    logger.info('    Adding the LSTM-RNN layer')
    if 'p' in args.model_type:
        layer = RNN(args.rnn_dim, return_sequences=True
                    )  #, dropout_W=dropout_W, dropout_U=dropout_U)
    else:
        layer = RNN(args.rnn_dim, return_sequences=False)
    if 'b' in args.model_type:
        # BiLSTM
        logger.info('        Bidirectional layer created!')
        layer = Bidirectional(layer)
    model.add(layer)
    logger.info('    Done')

    # Add MOT or ATT layer
    if 'p' in args.model_type:
        if args.aggregation == 'mot':
            logger.info('    Adding the MOT layer')
            model.add(MeanOverTime(mask_zero=True))
        elif args.aggregation.startswith('att'):
            logger.info('    Adding the ATT layer')
            model.add(
                Attention(op=args.aggregation,
                          activation='tanh',
                          name='att',
                          init_stdev=0.01))

    model.add(Dense(num_outputs))
    logger.info('    Done')

    model.add(Activation('sigmoid'))
    logger.info('All done!')

    return model
Пример #6
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    if args.dropout_w > 0:
        dropout_W = args.dropout_w
    else:
        dropout_W = args.dropout_prob  # default=0.5
    if args.dropout_u > 0:
        dropout_U = args.dropout_u
    else:
        dropout_U = args.dropout_prob  # default=0.1

    cnn_border_mode = 'same'

    if args.model_type == 'reg':
        if initial_mean_value.ndim == 0:
            initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
        num_outputs = len(initial_mean_value)
    else:
        num_outputs = initial_mean_value

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:

        def my_init(shape, name=None):
            from nea.w2vEmbReader import W2VEmbReader as EmbReader
            logger.info('Initializing lookup table')
            emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
            emb_matrix = np.random.random(shape)
            # 			logger.info(' initial matrix \n %s ' % (emb_matrix,))
            emb_matrix = emb_reader.get_emb_matrix_given_vocab(
                vocab, emb_matrix)
            # 			from keras.backend import set_value, get_value
            # 			set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W)))
            # 			model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
            # 			logger.info(' pre-trained matrix \n %s ' % (emb_matrix,))
            return K.variable(emb_matrix, name=name)

        logger.info(' Use pre-trained embedding')
    else:
        my_init = 'uniform'
        logger.info(' Use default initializing embedding')

    ###############################################################################################################################
    ## Model Stacking
    #

    if args.model_type == 'cls':
        logger.info('Building a CLASSIFICATION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'glorot_normal'
        if args.loss == 'cnp':
            final_activation = 'softmax'
            final_init = 'glorot_uniform'
        elif args.loss == 'hng':
            final_activation = 'linear'
            final_init = 'glorot_uniform'
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'he_normal'
        if args.normalize:
            final_activation = 'sigmoid'
            final_init = 'he_normal'
        else:
            final_activation = 'relu'
            final_init = 'he_uniform'
    else:
        raise NotImplementedError

    sequence = Input(shape=(overal_maxlen, ), dtype='int32')
    x = Embedding(len(vocab),
                  args.emb_dim,
                  mask_zero=True,
                  init=my_init,
                  trainable=args.embd_train)(sequence)

    # Conv Layer
    if args.cnn_dim > 0:
        x = Conv1DWithMasking(nb_filter=args.cnn_dim,
                              filter_length=args.cnn_window_size,
                              border_mode=cnn_border_mode,
                              subsample_length=1)(x)

    # RNN Layer
    if args.rnn_dim > 0:
        rnn_layer = RNN(args.rnn_dim,
                        return_sequences=True,
                        consume_less=args.rnn_opt,
                        dropout_W=dropout_W,
                        dropout_U=dropout_U)
        if args.bi:
            rnn_layer = Bidirectional(rnn_layer)
        x = rnn_layer(x)
        if args.dropout_prob > 0:
            x = Dropout(args.dropout_prob)(x)

        # Stack 2 Layers
        if args.rnn_2l or args.rnn_3l:
            rnn_layer2 = RNN(args.rnn_dim,
                             return_sequences=True,
                             consume_less=args.rnn_opt,
                             dropout_W=dropout_W,
                             dropout_U=dropout_U)
            if args.bi:
                rnn_layer2 = Bidirectional(rnn_layer2)
            x = rnn_layer2(x)
            if args.dropout_prob > 0:
                x = Dropout(args.dropout_prob)(x)
            # Stack 3 Layers
            if args.rnn_3l:
                rnn_layer3 = RNN(args.rnn_dim,
                                 return_sequences=True,
                                 consume_less=args.rnn_opt,
                                 dropout_W=dropout_W,
                                 dropout_U=dropout_U)
                if args.bi:
                    rnn_layer3 = Bidirectional(rnn_layer3)
                x = rnn_layer3(x)
                if args.dropout_prob > 0:
                    x = Dropout(args.dropout_prob)(x)

    # Mean over Time
    if args.aggregation == 'mot':
        x = MeanOverTime(mask_zero=True)(x)
    elif args.aggregation == 'att':
        attention_rnn = RNN(args.rnn_dim,
                            return_sequences=False,
                            consume_less=args.rnn_opt,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U)
        attention_rnn = Attention(attention_rnn)
        x = attention_rnn(x)
    else:
        raise NotImplementedError

    # Augmented TF/IDF Layer
    if args.tfidf > 0:
        pca_input = Input(shape=(args.tfidf, ), dtype='float32')
        merged = merge([x, pca_input], mode='concat')
    else:
        merged = x

    # Augmented Numerical Features
    if args.features:
        ftr_input = Input(shape=(13, ), dtype='float32')
        merged = merge([merged, ftr_input], mode='concat')

    # Optional Dense Layer
    if args.dense > 0:
        if args.loss == 'hng':
            merged = DenseWithMasking(num_outputs,
                                      init=dense_init,
                                      W_regularizer=l2(0.001),
                                      activity_regularizer=l2(0.001))(merged)
        else:
            merged = DenseWithMasking(num_outputs, init=dense_init)(merged)
        if final_activation == 'relu' or final_activation == 'linear':
            merged = BatchNormalization()(merged)
        merged = Activation(dense_activation)(merged)
        if args.dropout_prob > 0:
            merged = Dropout(args.dropout_prob)(merged)

    # Final Prediction Layer
    if args.loss == 'hng':
        merged = DenseWithMasking(num_outputs,
                                  init=final_init,
                                  W_regularizer=l2(0.001),
                                  activity_regularizer=l2(0.001))(merged)
    else:
        merged = DenseWithMasking(num_outputs, init=final_init)(merged)
    if final_activation == 'relu' or final_activation == 'linear':
        merged = BatchNormalization()(merged)
    predictions = Activation(final_activation)(merged)

    # Model Input/Output
    model_input = [
        sequence,
    ]
    if args.tfidf > 0:
        model_input.append(pca_input)
    if args.features:
        model_input.append(ftr_input)

    model = Model(input=model_input, output=predictions)

    logger.info('  Model Done')
    return model
Пример #7
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    import keras.backend as K
    from keras.layers.embeddings import Embedding
    from keras.models import Sequential, Model
    from keras.layers.core import Dense, Dropout, Activation
    from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    dropout_W = 0.5  # default=0.5
    dropout_U = 0.1  # default=0.1
    cnn_border_mode = 'same'
    if initial_mean_value.ndim == 0:  #expand the dims
        initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
    num_outputs = len(initial_mean_value)  #预测的分数种类数

    if args.model_type == 'cls':
        raise NotImplementedError

    #embedding-->cnn-->rnn(return_sequence=false)-->dropout-->dense-->sigmoid
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model')
        model = Sequential()
        #确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。
        #如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 1
        #此处,input层省略是因为input_length有默认值
        model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
        if args.cnn_dim > 0:  #border_mode==padding?? subsample_length==pooling?? where is the activation??
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:  #return_sequence 只返回最后一个 state
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=False,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        model.add(Dense(num_outputs))
        if not args.skip_init_bias:  #初始化最后一层layer的bias
            bias_value = (np.log(initial_mean_value) -
                          np.log(1 - initial_mean_value)).astype(K.floatx())
            model.layers[-1].b.set_value(bias_value)
        model.add(Activation('sigmoid'))  #输出区间为(0,1)
        #设置model的embed层的序号,方便后续用预训练词向量的初始化,model的所有层都存在  model.layers 里
        model.emb_index = 0

    #embedding-->cnn-->rnn(return_sequence=true)-->dropout-->MeanoverTime or Attention(mean or sum)-->Dense-->sigmoid
    elif args.model_type == 'regp':
        logger.info('Building a REGRESSION model with POOLING')
        model = Sequential()
        model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
        if args.cnn_dim > 0:
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=True,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        if args.aggregation == 'mot':
            model.add(MeanOverTime(mask_zero=True))
        elif args.aggregation.startswith('att'):
            model.add(
                Attention(op=args.aggregation,
                          activation='tanh',
                          init_stdev=0.01))
        model.add(Dense(num_outputs))
        if not args.skip_init_bias:
            bias_value = (np.log(initial_mean_value) -
                          np.log(1 - initial_mean_value)).astype(K.floatx())
            model.layers[-1].b.set_value(bias_value)
        model.add(Activation('sigmoid'))
        model.emb_index = 0
    #embedding-->cnn-->birnn(return_sequence=false)-->dropout-->merge(concat the forRnn&backRnn)-->dense-->sigmoid
    elif args.model_type == 'breg':
        logger.info('Building a BIDIRECTIONAL REGRESSION model')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()  #这句应该是多余的
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=False,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=False,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1
    #embedding-->cnn-->biRnn(return_sequence=true)-->dropout-->meanOverTime-->merge(concat)-->dense-->sigmoid
    elif args.model_type == 'bregp':
        logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()  #多余的
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=True,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=True,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        forwards_mean = MeanOverTime(mask_zero=True)(forwards)
        backwards_mean = MeanOverTime(mask_zero=True)(backwards)
        merged = merge([forwards_mean, backwards_mean],
                       mode='concat',
                       concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1

    logger.info('  Done')

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing lookup table')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        model.layers[model.emb_index].W.set_value(
            emb_reader.get_emb_matrix_given_vocab(
                vocab, model.layers[model.emb_index].W.get_value()))
        logger.info('  Done')

    return model