Пример #1
0
def get_model(data: SupervisedData, embed_hidden_size):
    sentence = Input(shape=data.input_shape[SENTENCE], dtype='int32')
    # noinspection PyUnresolvedReferences
    encoded_sentence = Embedding(data.vocab_size, embed_hidden_size)(sentence)
    encoded_sentence = Dropout(0.3)(encoded_sentence)

    question = Input(shape=data.input_shape[QUERY], dtype='int32')
    # noinspection PyUnresolvedReferences
    encoded_question = Embedding(data.vocab_size, embed_hidden_size)(question)
    encoded_question = Dropout(0.3)(encoded_question)
    encoded_question = RNN(embed_hidden_size)(encoded_question)
    # noinspection PyUnresolvedReferences
    encoded_question = RepeatVector(data.sentence_maxlen)(encoded_question)

    merged = add([encoded_sentence, encoded_question])
    merged = RNN(embed_hidden_size)(merged)
    merged = Dropout(0.3)(merged)
    # noinspection PyUnresolvedReferences
    preds = Dense(data.vocab_size, activation='softmax')(merged)

    model = Model([sentence, question], preds)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    return model
Пример #2
0
def build_basic_model():
    # Encoder
    encoder_inputs = Input(shape=(max_encoder_seq_length, ),
                           name='encoder_inputs')
    encoder_embedding = embedding_layer(encoder_inputs)
    rnn_encoded, *encoder_states = LSTM(latent_dim,
                                        return_sequences=True,
                                        return_state=True,
                                        name='encoder_lstm')(encoder_embedding)

    decoder_inputs = Input(shape=(None, ), name='decoder_inputs')
    decoder_embedding = target_emedding_layer(decoder_inputs)

    attentionCell = AttentionRNNCell(units=latent_dim,
                                     encoder_ts=max_encoder_seq_length,
                                     encoder_latdim=latent_dim)
    Decoder_layer = RNN(attentionCell,
                        return_sequences=True,
                        return_state=True)
    decoder_outputs, decoder_states = Decoder_layer(decoder_embedding,
                                                    encoder_states[0],
                                                    constants=rnn_encoded)

    decoder_dense = Dense(num_decoder_tokens,
                          activation='softmax',
                          name='decoder_dense')
    decoder_outputs = decoder_dense(decoder_outputs)

    basic_model = Model(inputs=[encoder_inputs, decoder_inputs],
                        outputs=[decoder_outputs])
    basic_model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
    return basic_model
Пример #3
0
def lmu_layer(return_sequences=False,**kwargs):
    return RNN(LMUCell(units=6,
                       order=6,
                       theta=6,
                       input_encoders_initializer=Constant(1),
                       hidden_encoders_initializer=Constant(0),
                       memory_encoders_initializer=Constant(0),
                       input_kernel_initializer=Constant(0),
                       hidden_kernel_initializer=Constant(0),
                       memory_kernel_initializer='glorot_normal',
                      ),
               return_sequences=return_sequences,
               **kwargs)
Пример #4
0
    def lstm_model(self):

        inp = Input(shape=(self.maxlen, ))
        if self.embeddings is not None:
            self.vocab_size = self.embeddings.embedding_matrix.shape[0]
            x = Embedding(self.vocab_size,
                          c.MODEL.embed_size,
                          weights=[self.embeddings.embedding_matrix],
                          trainable=False)(inp)
        else:
            x = Embedding(self.vocab_size, c.MODEL.embed_size)(inp)
        for i in range(c.MODEL.n_layers):
            if (c.MODEL.seq_type == 'lstm'):
                x = LSTM(c.MODEL.embed_size,
                         return_sequences=True,
                         dropout=0.1,
                         recurrent_dropout=0.1)(x)
            elif (c.MODEL.seq_type == 'gru'):
                x = GRU(c.MODEL.embed_size,
                        return_sequences=True,
                        dropout=0.1,
                        recurrent_dropout=0.1)(x)
            else:
                x = RNN(c.MODEL.embed_size,
                        return_sequences=True,
                        dropout=0.1,
                        recurrent_dropout=0.1)(x)
        if c.MODEL.attention:
            x = AttentionWeightedAverage()(x)
        else:
            x = GlobalMaxPool1D()(x)
        x = Dense(c.MODEL.embed_size, activation="relu")(x)
        x = Dropout(0.1)(x)
        x = Dense(c.MODEL.nclasses, activation="sigmoid")(x)

        #if self.ngpus>1:
        #    with tf.device("/cpu:0"):
        #        model = Model(inputs=inp, outputs=x)
        #else:
        #    model = Model(inputs=inp, outputs=x)

        model = Model(inputs=inp, outputs=x)
        adam = optimizers.Adam(lr=c.TRAINING.l_rate)
        model.compile(loss='binary_crossentropy',
                      optimizer=adam,
                      metrics=['accuracy'])
        return model
Пример #5
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    if args.dropout_w > 0:
        dropout_W = args.dropout_w
    else:
        dropout_W = args.dropout_prob  # default=0.5
    if args.dropout_u > 0:
        dropout_U = args.dropout_u
    else:
        dropout_U = args.dropout_prob  # default=0.1

    cnn_border_mode = 'same'

    if args.model_type == 'reg':
        if initial_mean_value.ndim == 0:
            initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
        num_outputs = len(initial_mean_value)
    else:
        num_outputs = initial_mean_value

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:

        def my_init(shape, name=None):
            from nea.w2vEmbReader import W2VEmbReader as EmbReader
            logger.info('Initializing lookup table')
            emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
            emb_matrix = np.random.random(shape)
            # 			logger.info(' initial matrix \n %s ' % (emb_matrix,))
            emb_matrix = emb_reader.get_emb_matrix_given_vocab(
                vocab, emb_matrix)
            # 			from keras.backend import set_value, get_value
            # 			set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W)))
            # 			model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
            # 			logger.info(' pre-trained matrix \n %s ' % (emb_matrix,))
            return K.variable(emb_matrix, name=name)

        logger.info(' Use pre-trained embedding')
    else:
        my_init = 'uniform'
        logger.info(' Use default initializing embedding')

    ###############################################################################################################################
    ## Model Stacking
    #

    if args.model_type == 'cls':
        logger.info('Building a CLASSIFICATION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'glorot_normal'
        final_init = 'glorot_uniform'
        if args.loss == 'cnp':
            final_activation = 'softmax'
        elif args.loss == 'hng':
            final_activation = 'linear'
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model with POOLING')
        if args.normalize:
            final_activation = 'sigmoid'
            final_init = 'he_normal'
            dense_activation = 'tanh'
            dense_init = 'he_normal'
        else:
            final_activation = 'relu'
            final_init = 'he_uniform'
            dense_activation = 'tanh'
            dense_init = 'he_uniform'
    else:
        raise NotImplementedError

    sequence = Input(shape=(overal_maxlen, ), dtype='int32')
    x = Embedding(len(vocab),
                  args.emb_dim,
                  mask_zero=True,
                  init=my_init,
                  trainable=args.embd_train)(sequence)

    # Conv Layer
    if args.cnn_dim > 0:
        x = Conv1DWithMasking(nb_filter=args.cnn_dim,
                              filter_length=args.cnn_window_size,
                              border_mode=cnn_border_mode,
                              subsample_length=1)(x)

    # RNN Layer
    if args.rnn_dim > 0:
        forwards = RNN(args.rnn_dim,
                       return_sequences=True,
                       dropout_W=dropout_W,
                       dropout_U=dropout_U)(x)
        if args.bi:
            backwards = RNN(args.rnn_dim,
                            return_sequences=True,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(x)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            if args.bi:
                backwards = Dropout(args.dropout_prob)(backwards)
        # Stack 2 Layers
        if args.rnn_2l or args.rnn_3l:
            if args.bi:
                merged = merge([forwards, backwards],
                               mode='concat',
                               concat_axis=-1)
            else:
                merged = forwards
            forwards = RNN(args.rnn_dim,
                           return_sequences=True,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(merged)
            if args.bi:
                backwards = RNN(args.rnn_dim,
                                return_sequences=True,
                                dropout_W=dropout_W,
                                dropout_U=dropout_U,
                                go_backwards=True)(merged)
            if args.dropout_prob > 0:
                forwards = Dropout(args.dropout_prob)(forwards)
                if args.bi:
                    backwards = Dropout(args.dropout_prob)(backwards)
            # Stack 3 Layers
            if args.rnn_3l:
                if args.bi:
                    merged = merge([forwards, backwards],
                                   mode='concat',
                                   concat_axis=-1)
                else:
                    merged = forwards
                forwards = RNN(args.rnn_dim,
                               return_sequences=True,
                               dropout_W=dropout_W,
                               dropout_U=dropout_U)(merged)
                if args.bi:
                    backwards = RNN(args.rnn_dim,
                                    return_sequences=True,
                                    dropout_W=dropout_W,
                                    dropout_U=dropout_U,
                                    go_backwards=True)(merged)
                if args.dropout_prob > 0:
                    forwards = Dropout(args.dropout_prob)(forwards)
                    if args.bi:
                        backwards = Dropout(args.dropout_prob)(backwards)

        if args.aggregation == 'mot':
            forwards = MeanOverTime(mask_zero=True)(forwards)
            if args.bi:
                backwards = MeanOverTime(mask_zero=True)(backwards)
                merged = merge([forwards, backwards],
                               mode='concat',
                               concat_axis=-1)
            else:
                merged = forwards
        else:
            raise NotImplementedError

        # Augmented TF/IDF Layer
        if args.tfidf > 0:
            pca_input = Input(shape=(args.tfidf, ), dtype='float32')
            tfidfmerged = merge([merged, pca_input], mode='concat')
        else:
            tfidfmerged = merged

        # Optional Dense Layer
        if args.dense > 0:
            if args.loss == 'hng':
                tfidfmerged = Dense(
                    num_outputs,
                    init=dense_init,
                    W_regularizer=l2(0.001),
                    activity_regularizer=activity_l2(0.001))(tfidfmerged)
            else:
                tfidfmerged = Dense(num_outputs, init=dense_init)(tfidfmerged)
            if final_activation == 'relu' or final_activation == 'linear':
                tfidfmerged = BatchNormalization()(tfidfmerged)
            tfidfmerged = Activation(dense_activation)(tfidfmerged)
            if args.dropout_prob > 0:
                tfidfmerged = Dropout(args.dropout_prob)(tfidfmerged)

        # Final Prediction Layer
        if args.loss == 'hng':
            tfidfmerged = Dense(
                num_outputs,
                init=final_init,
                W_regularizer=l2(0.001),
                activity_regularizer=activity_l2(0.001))(tfidfmerged)
        else:
            tfidfmerged = Dense(num_outputs, init=final_init)(tfidfmerged)
        if final_activation == 'relu' or final_activation == 'linear':
            tfidfmerged = BatchNormalization()(tfidfmerged)
        predictions = Activation(final_activation)(tfidfmerged)

    else:  # if no rnn
        if args.dropout_prob > 0:
            x = Dropout(args.dropout_prob)(x)
        # Mean over Time
        if args.aggregation == 'mot':
            x = MeanOverTime(mask_zero=True)(x)
        else:
            raise NotImplementedError
        # Augmented TF/IDF Layer
        if args.tfidf > 0:
            pca_input = Input(shape=(args.tfidf, ), dtype='float32')
            z = merge([x, pca_input], mode='concat')
        else:
            z = x
        # Optional Dense Layer
        if args.dense > 0:
            if args.loss == 'hng':
                z = Dense(args.dense,
                          init=dense_init,
                          W_regularizer=l2(0.001),
                          activity_regularizer=activity_l2(0.001))(z)
            else:
                z = Dense(args.dense, init=dense_init)(z)
            if final_activation == 'relu' or final_activation == 'linear':
                z = BatchNormalization()(z)
            z = Activation(dense_activation)(z)
            if args.dropout_prob > 0:
                z = Dropout(args.dropout_prob)(z)
        # Final Prediction Layer
        if args.loss == 'hng':
            z = Dense(num_outputs,
                      init=final_init,
                      W_regularizer=l2(0.001),
                      activity_regularizer=activity_l2(0.001))(z)
        else:
            z = Dense(args.dense, init=dense_init)(z)
        if final_activation == 'relu' or final_activation == 'linear':
            z = BatchNormalization()(z)
        predictions = Activation(final_activation)(z)

    # Model Input/Output
    if args.tfidf > 0:
        model = Model(input=[sequence, pca_input], output=predictions)
    else:
        model = Model(input=sequence, output=predictions)

# 	if args.model_type == 'cls':
# 		logger.info('Building a CLASSIFICATION model')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.cnn_dim > 0:
# 			x = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(x)
# 		if args.rnn_dim > 0:
# 			x = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(x)
# 		predictions = Dense(num_outputs, activation='softmax')(x)
# 		model = Model(input=sequence, output=predictions)

# 	elif args.model_type == 'clsp':

# 	elif args.model_type == 'mlp':
# 		logger.info('Building a linear model with POOLING')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		x = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.dropout_prob > 0:
# 			x = Dropout(args.dropout_prob)(x)
# 		x = MeanOverTime(mask_zero=True)(x)
# 		if args.tfidf > 0:
# 			z = merge([x,pca_input], mode='concat')
# 		else:
# 			z = x
# 		if args.dense > 0:
# 			z = Dense(args.dense, activation='tanh')(z)
# 			if args.dropout_prob > 0:
# 				z = Dropout(args.dropout_prob)(z)
# 		predictions = Dense(num_outputs, activation='softmax')(z)
# 		if args.tfidf > 0:
# 			model = Model(input=[sequence, pca_input], output=predictions)
# 		else:
# 			model = Model(input=sequence, output=predictions)
#
# 	elif args.model_type == 'reg':
# 		logger.info('Building a REGRESSION model')
# 		model = Sequential()
# 		model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train))
# 		if args.cnn_dim > 0:
# 			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
# 		if args.rnn_dim > 0:
# 			model.add(RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U))
# 		if args.dropout_prob > 0:
# 			model.add(Dropout(args.dropout_prob))
# 		model.add(Dense(num_outputs))
# 		if not args.skip_init_bias:
# 			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
# 			model.layers[-1].b.set_value(bias_value)
# 		model.add(Activation('sigmoid'))
#
# 	elif args.model_type == 'regp':
# 		logger.info('Building a REGRESSION model with POOLING')
# 		model = Sequential()
# 		model.add(Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train))
# 		if args.cnn_dim > 0:
# 			model.add(Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1))
# 		if args.rnn_dim > 0:
# 			model.add(RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U))
# 		if args.dropout_prob > 0:
# 			model.add(Dropout(args.dropout_prob))
# 		if args.aggregation == 'mot':
# 			model.add(MeanOverTime(mask_zero=True))
# 		elif args.aggregation.startswith('att'):
# 			model.add(Attention(op=args.aggregation, activation='tanh', init_stdev=0.01))
# 		model.add(Dense(num_outputs))
# 		if not args.skip_init_bias:
# 			bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
# # 			model.layers[-1].b.set_value(bias_value)
# 			K.set_value(model.layers[-1].b, bias_value)
# 		model.add(Activation('sigmoid'))
#
# 	elif args.model_type == 'breg':
# 		logger.info('Building a BIDIRECTIONAL REGRESSION model')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.cnn_dim > 0:
# 			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
# 		if args.rnn_dim > 0:
# 			forwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U)(output)
# 			backwards = RNN(args.rnn_dim, return_sequences=False, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
# 		if args.dropout_prob > 0:
# 			forwards = Dropout(args.dropout_prob)(forwards)
# 			backwards = Dropout(args.dropout_prob)(backwards)
# 		merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
# 		densed = Dense(num_outputs)(merged)
# 		if not args.skip_init_bias:
# 			raise NotImplementedError
# 		score = Activation('sigmoid')(densed)
# 		model = Model(input=sequence, output=score)
#
# 	elif args.model_type == 'bregp':
# 		logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
# 		sequence = Input(shape=(overal_maxlen,), dtype='int32')
# 		output = Embedding(len(vocab), args.emb_dim, mask_zero=True, init=my_init, trainable=args.embd_train)(sequence)
# 		if args.cnn_dim > 0:
# 			output = Conv1DWithMasking(nb_filter=args.cnn_dim, filter_length=args.cnn_window_size, border_mode=cnn_border_mode, subsample_length=1)(output)
# 		if args.rnn_dim > 0:
# 			forwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U)(output)
# 			backwards = RNN(args.rnn_dim, return_sequences=True, dropout_W=dropout_W, dropout_U=dropout_U, go_backwards=True)(output)
# 		if args.dropout_prob > 0:
# 			forwards = Dropout(args.dropout_prob)(forwards)
# 			backwards = Dropout(args.dropout_prob)(backwards)
# 		forwards_mean = MeanOverTime(mask_zero=True)(forwards)
# 		backwards_mean = MeanOverTime(mask_zero=True)(backwards)
# 		merged = merge([forwards_mean, backwards_mean], mode='concat', concat_axis=-1)
# 		densed = Dense(num_outputs)(merged)
# 		if not args.skip_init_bias:
# 			raise NotImplementedError
# 		score = Activation('sigmoid')(densed)
# 		model = Model(input=sequence, output=score)

    logger.info('  Model Done')
    return model
Пример #6
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):
    import keras.backend as K
    from keras.layers.embeddings import Embedding
    from keras.models import Sequential, Model
    from keras.layers.core import Dense, Dropout, Activation
    from .my_layers import Attention, MeanOverTime, Conv1DWithMasking

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    dropout_W = 0.5  # default=0.5
    dropout_U = 0.1  # default=0.1
    cnn_border_mode = 'same'
    if initial_mean_value.ndim == 0:
        initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
    num_outputs = len(initial_mean_value)

    if args.model_type == 'cls':
        raise NotImplementedError

    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model')
        model = Sequential()
        # model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True, trainable=False))
        model.add(
            Embedding(args.vocab_size,
                      args.emb_dim,
                      mask_zero=True,
                      trainable=True))
        if args.cnn_dim > 0:
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=False,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        model.add(Dense(num_outputs))
        # if not args.skip_init_bias:
        #     bias_value = (np.log(initial_mean_value) - np.log(1 - initial_mean_value)).astype(K.floatx())
        # model.layers[-1].b.set_value(bias_value)
        # model.layers[-1].bias_initializer = keras.initializers.Constant(value=bias_value)
        model.add(Activation('sigmoid'))
        model.emb_index = 0

    elif args.model_type == 'regp':
        logger.info('Building a REGRESSION model with POOLING')
        model = Sequential()
        model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
        if args.cnn_dim > 0:
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=True,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        if args.aggregation == 'mot':
            model.add(MeanOverTime(mask_zero=True))
        elif args.aggregation.startswith('att'):
            model.add(
                Attention(op=args.aggregation,
                          activation='tanh',
                          init_stdev=0.01))
        model.add(Dense(num_outputs))
        if not args.skip_init_bias:
            bias_value = (np.log(initial_mean_value) -
                          np.log(1 - initial_mean_value)).astype(K.floatx())
            # model.layers[-1].b.set_value(bias_value)
            model.layers[-1].bias_initializer = keras.initializers.Constant(
                value=bias_value)
        model.add(Activation('sigmoid'))
        model.emb_index = 0

    elif args.model_type == 'breg':
        logger.info('Building a BIDIRECTIONAL REGRESSION model')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=False,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=False,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1

    elif args.model_type == 'bregp':
        logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=True,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=True,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        forwards_mean = MeanOverTime(mask_zero=True)(forwards)
        backwards_mean = MeanOverTime(mask_zero=True)(backwards)
        merged = merge([forwards_mean, backwards_mean],
                       mode='concat',
                       concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1

    logger.info('  Done')

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:
        from .w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing lookup table')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        model.layers[model.emb_index].W.set_value(
            emb_reader.get_emb_matrix_given_vocab(
                vocab, model.layers[model.emb_index].W.get_value()))
        logger.info('  Done')
    return model
Пример #7
0
def create_model(args, initial_mean_value, vocab):

    from keras.layers.embeddings import Embedding
    from keras.models import Sequential
    from keras.layers.core import Dense, Activation
    from keras.layers import Bidirectional
    from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    # dropout_W = 0.5        # default=0.5
    # dropout_U = 0.1        # default=0.1
    cnn_border_mode = 'same'
    if initial_mean_value.ndim == 0:
        print("Dim of initial_mean_value is 0")
        initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
    num_outputs = len(initial_mean_value)
    print("Dim of initial_mean_value is:", num_outputs)

    if args.model_type == 'cls':
        raise NotImplementedError

    logger.info('Building the model:%s' % args.model_type)
    model = Sequential()

    logger.info('    Adding the Embedding layer')
    model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
    model.emb_index = 0
    if args.emb_path:
        from nea.w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('        Initializing lookup table')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        # ipdb.set_trace()
        # model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
        model.layers[model.emb_index].set_weights([
            emb_reader.get_emb_matrix_given_vocab(
                vocab, model.layers[model.emb_index].get_weights()[0])
        ])
        # ipdb.set_trace()
    logger.info('    Done')

    # Add cnn layer
    if args.cnn_dim > 0:
        logger.info('    Adding the CNN layer')
        logger.info('        cnn_dim:%d' % args.cnn_dim)
        logger.info('        window_size:%d' % args.cnn_window_size)
        model.add(
            Conv1DWithMasking(nb_filter=args.cnn_dim,
                              filter_length=args.cnn_window_size,
                              border_mode=cnn_border_mode,
                              subsample_length=1))
        logger.info('    Done')

    # Add LSTM RNN layer
    logger.info('    Adding the LSTM-RNN layer')
    if 'p' in args.model_type:
        layer = RNN(args.rnn_dim, return_sequences=True
                    )  #, dropout_W=dropout_W, dropout_U=dropout_U)
    else:
        layer = RNN(args.rnn_dim, return_sequences=False)
    if 'b' in args.model_type:
        # BiLSTM
        logger.info('        Bidirectional layer created!')
        layer = Bidirectional(layer)
    model.add(layer)
    logger.info('    Done')

    # Add MOT or ATT layer
    if 'p' in args.model_type:
        if args.aggregation == 'mot':
            logger.info('    Adding the MOT layer')
            model.add(MeanOverTime(mask_zero=True))
        elif args.aggregation.startswith('att'):
            logger.info('    Adding the ATT layer')
            model.add(
                Attention(op=args.aggregation,
                          activation='tanh',
                          name='att',
                          init_stdev=0.01))

    model.add(Dense(num_outputs))
    logger.info('    Done')

    model.add(Activation('sigmoid'))
    logger.info('All done!')

    return model
Пример #8
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    if args.dropout_w > 0:
        dropout_W = args.dropout_w
    else:
        dropout_W = args.dropout_prob  # default=0.5
    if args.dropout_u > 0:
        dropout_U = args.dropout_u
    else:
        dropout_U = args.dropout_prob  # default=0.1

    cnn_border_mode = 'same'

    if args.model_type == 'reg':
        if initial_mean_value.ndim == 0:
            initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
        num_outputs = len(initial_mean_value)
    else:
        num_outputs = initial_mean_value

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:

        def my_init(shape, name=None):
            from nea.w2vEmbReader import W2VEmbReader as EmbReader
            logger.info('Initializing lookup table')
            emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
            emb_matrix = np.random.random(shape)
            # 			logger.info(' initial matrix \n %s ' % (emb_matrix,))
            emb_matrix = emb_reader.get_emb_matrix_given_vocab(
                vocab, emb_matrix)
            # 			from keras.backend import set_value, get_value
            # 			set_value(model.layers[model.emb_index].W, get_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W)))
            # 			model.layers[model.emb_index].W.set_value(emb_reader.get_emb_matrix_given_vocab(vocab, model.layers[model.emb_index].W.get_value()))
            # 			logger.info(' pre-trained matrix \n %s ' % (emb_matrix,))
            return K.variable(emb_matrix, name=name)

        logger.info(' Use pre-trained embedding')
    else:
        my_init = 'uniform'
        logger.info(' Use default initializing embedding')

    ###############################################################################################################################
    ## Model Stacking
    #

    if args.model_type == 'cls':
        logger.info('Building a CLASSIFICATION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'glorot_normal'
        if args.loss == 'cnp':
            final_activation = 'softmax'
            final_init = 'glorot_uniform'
        elif args.loss == 'hng':
            final_activation = 'linear'
            final_init = 'glorot_uniform'
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model with POOLING')
        dense_activation = 'tanh'
        dense_init = 'he_normal'
        if args.normalize:
            final_activation = 'sigmoid'
            final_init = 'he_normal'
        else:
            final_activation = 'relu'
            final_init = 'he_uniform'
    else:
        raise NotImplementedError

    sequence = Input(shape=(overal_maxlen, ), dtype='int32')
    x = Embedding(len(vocab),
                  args.emb_dim,
                  mask_zero=True,
                  init=my_init,
                  trainable=args.embd_train)(sequence)

    # Conv Layer
    if args.cnn_dim > 0:
        x = Conv1DWithMasking(nb_filter=args.cnn_dim,
                              filter_length=args.cnn_window_size,
                              border_mode=cnn_border_mode,
                              subsample_length=1)(x)

    # RNN Layer
    if args.rnn_dim > 0:
        rnn_layer = RNN(args.rnn_dim,
                        return_sequences=True,
                        consume_less=args.rnn_opt,
                        dropout_W=dropout_W,
                        dropout_U=dropout_U)
        if args.bi:
            rnn_layer = Bidirectional(rnn_layer)
        x = rnn_layer(x)
        if args.dropout_prob > 0:
            x = Dropout(args.dropout_prob)(x)

        # Stack 2 Layers
        if args.rnn_2l or args.rnn_3l:
            rnn_layer2 = RNN(args.rnn_dim,
                             return_sequences=True,
                             consume_less=args.rnn_opt,
                             dropout_W=dropout_W,
                             dropout_U=dropout_U)
            if args.bi:
                rnn_layer2 = Bidirectional(rnn_layer2)
            x = rnn_layer2(x)
            if args.dropout_prob > 0:
                x = Dropout(args.dropout_prob)(x)
            # Stack 3 Layers
            if args.rnn_3l:
                rnn_layer3 = RNN(args.rnn_dim,
                                 return_sequences=True,
                                 consume_less=args.rnn_opt,
                                 dropout_W=dropout_W,
                                 dropout_U=dropout_U)
                if args.bi:
                    rnn_layer3 = Bidirectional(rnn_layer3)
                x = rnn_layer3(x)
                if args.dropout_prob > 0:
                    x = Dropout(args.dropout_prob)(x)

    # Mean over Time
    if args.aggregation == 'mot':
        x = MeanOverTime(mask_zero=True)(x)
    elif args.aggregation == 'att':
        attention_rnn = RNN(args.rnn_dim,
                            return_sequences=False,
                            consume_less=args.rnn_opt,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U)
        attention_rnn = Attention(attention_rnn)
        x = attention_rnn(x)
    else:
        raise NotImplementedError

    # Augmented TF/IDF Layer
    if args.tfidf > 0:
        pca_input = Input(shape=(args.tfidf, ), dtype='float32')
        merged = merge([x, pca_input], mode='concat')
    else:
        merged = x

    # Augmented Numerical Features
    if args.features:
        ftr_input = Input(shape=(13, ), dtype='float32')
        merged = merge([merged, ftr_input], mode='concat')

    # Optional Dense Layer
    if args.dense > 0:
        if args.loss == 'hng':
            merged = DenseWithMasking(num_outputs,
                                      init=dense_init,
                                      W_regularizer=l2(0.001),
                                      activity_regularizer=l2(0.001))(merged)
        else:
            merged = DenseWithMasking(num_outputs, init=dense_init)(merged)
        if final_activation == 'relu' or final_activation == 'linear':
            merged = BatchNormalization()(merged)
        merged = Activation(dense_activation)(merged)
        if args.dropout_prob > 0:
            merged = Dropout(args.dropout_prob)(merged)

    # Final Prediction Layer
    if args.loss == 'hng':
        merged = DenseWithMasking(num_outputs,
                                  init=final_init,
                                  W_regularizer=l2(0.001),
                                  activity_regularizer=l2(0.001))(merged)
    else:
        merged = DenseWithMasking(num_outputs, init=final_init)(merged)
    if final_activation == 'relu' or final_activation == 'linear':
        merged = BatchNormalization()(merged)
    predictions = Activation(final_activation)(merged)

    # Model Input/Output
    model_input = [
        sequence,
    ]
    if args.tfidf > 0:
        model_input.append(pca_input)
    if args.features:
        model_input.append(ftr_input)

    model = Model(input=model_input, output=predictions)

    logger.info('  Model Done')
    return model
Пример #9
0
def create_model(args, vocab, num_outputs, overal_maxlen, maxlen_aspect):
    
    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    dropout = args.dropout_W       
    recurrent_dropout = args.dropout_U  
    vocab_size = len(vocab)

    logger.info('Building a LSTM attention model to predict term/aspect sentiment')
    print '\n\n'

    ##### Inputs #####
    sentence_input = Input(shape=(overal_maxlen,), dtype='int32', name='sentence_input')
    aspect_input = Input(shape=(maxlen_aspect,), dtype='int32', name='aspect_input')
    pretrain_input = Input(shape=(None,), dtype='int32', name='pretrain_input')

    ##### construct word embedding layer #####
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    ### represent aspect as averaged word embedding ###
    print 'use average term embs as aspect embedding'
    aspect_term_embs = word_emb(aspect_input)
    aspect_embs = Average(mask_zero=True, name='aspect_emb')(aspect_term_embs)

    ### sentence representation ###
    sentence_output = word_emb(sentence_input)
    pretrain_output = word_emb(pretrain_input)


    print 'use a rnn layer'
    rnn = RNN(args.rnn_dim, return_sequences=True, dropout=dropout, recurrent_dropout=recurrent_dropout, name='lstm')
    sentence_output = rnn(sentence_output)
    pretrain_output = rnn(pretrain_output)

    print 'use content attention to get term weights'
    att_weights = Attention(name='att_weights')([sentence_output, aspect_embs])
    sentence_output = WeightedSum()([sentence_output, att_weights])

    pretrain_output = Average(mask_zero=True)(pretrain_output)
  
    if args.dropout_prob > 0:
        print 'use dropout layer'
        sentence_output = Dropout(args.dropout_prob)(sentence_output)
        pretrain_output = Dropout(args.dropout_prob)(pretrain_output)


    sentence_output = Dense(num_outputs, name='dense_1')(sentence_output)
    pretrain_output = Dense(num_outputs, name='dense_2')(pretrain_output)

    aspect_probs = Activation('softmax', name='aspect_model')(sentence_output)
    doc_probs = Activation('softmax', name='pretrain_model')(pretrain_output)

    model = Model(inputs=[sentence_input, aspect_input, pretrain_input], outputs=[aspect_probs, doc_probs])


    logger.info('  Done')

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.is_pretrain:

        import pickle

        print 'Set embedding, lstm, and dense weights from pre-trained models'
        if args.domain == 'lt':
            f_1 = open('../pretrained_weights/lstm_weights_lt%.1f.pkl'%(args.percetage), 'rb')
            f_2 = open('../pretrained_weights/dense_weights_lt%.1f.pkl'%(args.percetage), 'rb')
        else:
            f_1 = open('../pretrained_weights/lstm_weights_res%.1f.pkl'%(args.percetage), 'rb')
            f_2 = open('../pretrained_weights/dense_weights_res%.1f.pkl'%(args.percetage), 'rb')

        lstm_weights = pickle.load(f_1)
        dense_weights = pickle.load(f_2)
      
        model.get_layer('lstm').set_weights(lstm_weights)
        model.get_layer('dense_1').set_weights(dense_weights)
        model.get_layer('dense_2').set_weights(dense_weights)


    from w2vEmbReader import W2VEmbReader as EmbReader
    logger.info('Initializing lookup table')
    emb_path = '../glove/%s.txt'%(args.domain)
    emb_reader = EmbReader(args, emb_path)
    model.get_layer('word_emb').set_weights(emb_reader.get_emb_matrix_given_vocab(vocab, model.get_layer('word_emb').get_weights()))
    logger.info('  Done')

    return model
Пример #10
0
def create_model(args, initial_mean_value, overal_maxlen, vocab):

    import keras.backend as K
    from keras.layers.embeddings import Embedding
    from keras.models import Sequential, Model
    from keras.layers.core import Dense, Dropout, Activation
    from nea.my_layers import Attention, MeanOverTime, Conv1DWithMasking

    ###############################################################################################################################
    ## Recurrence unit type
    #

    if args.recurrent_unit == 'lstm':
        from keras.layers.recurrent import LSTM as RNN
    elif args.recurrent_unit == 'gru':
        from keras.layers.recurrent import GRU as RNN
    elif args.recurrent_unit == 'simple':
        from keras.layers.recurrent import SimpleRNN as RNN

    ###############################################################################################################################
    ## Create Model
    #

    dropout_W = 0.5  # default=0.5
    dropout_U = 0.1  # default=0.1
    cnn_border_mode = 'same'
    if initial_mean_value.ndim == 0:  #expand the dims
        initial_mean_value = np.expand_dims(initial_mean_value, axis=1)
    num_outputs = len(initial_mean_value)  #预测的分数种类数

    if args.model_type == 'cls':
        raise NotImplementedError

    #embedding-->cnn-->rnn(return_sequence=false)-->dropout-->dense-->sigmoid
    elif args.model_type == 'reg':
        logger.info('Building a REGRESSION model')
        model = Sequential()
        #确定是否将输入中的‘0’看作是应该被忽略的‘填充’(padding)值设置为True的话,模型中后续的层必须都支持masking,否则会抛出异常。
        #如果该值为True,则下标0在字典中不可用,input_dim应设置为|vocabulary| + 1
        #此处,input层省略是因为input_length有默认值
        model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
        if args.cnn_dim > 0:  #border_mode==padding?? subsample_length==pooling?? where is the activation??
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:  #return_sequence 只返回最后一个 state
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=False,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        model.add(Dense(num_outputs))
        if not args.skip_init_bias:  #初始化最后一层layer的bias
            bias_value = (np.log(initial_mean_value) -
                          np.log(1 - initial_mean_value)).astype(K.floatx())
            model.layers[-1].b.set_value(bias_value)
        model.add(Activation('sigmoid'))  #输出区间为(0,1)
        #设置model的embed层的序号,方便后续用预训练词向量的初始化,model的所有层都存在  model.layers 里
        model.emb_index = 0

    #embedding-->cnn-->rnn(return_sequence=true)-->dropout-->MeanoverTime or Attention(mean or sum)-->Dense-->sigmoid
    elif args.model_type == 'regp':
        logger.info('Building a REGRESSION model with POOLING')
        model = Sequential()
        model.add(Embedding(args.vocab_size, args.emb_dim, mask_zero=True))
        if args.cnn_dim > 0:
            model.add(
                Conv1DWithMasking(nb_filter=args.cnn_dim,
                                  filter_length=args.cnn_window_size,
                                  border_mode=cnn_border_mode,
                                  subsample_length=1))
        if args.rnn_dim > 0:
            model.add(
                RNN(args.rnn_dim,
                    return_sequences=True,
                    dropout_W=dropout_W,
                    dropout_U=dropout_U))
        if args.dropout_prob > 0:
            model.add(Dropout(args.dropout_prob))
        if args.aggregation == 'mot':
            model.add(MeanOverTime(mask_zero=True))
        elif args.aggregation.startswith('att'):
            model.add(
                Attention(op=args.aggregation,
                          activation='tanh',
                          init_stdev=0.01))
        model.add(Dense(num_outputs))
        if not args.skip_init_bias:
            bias_value = (np.log(initial_mean_value) -
                          np.log(1 - initial_mean_value)).astype(K.floatx())
            model.layers[-1].b.set_value(bias_value)
        model.add(Activation('sigmoid'))
        model.emb_index = 0
    #embedding-->cnn-->birnn(return_sequence=false)-->dropout-->merge(concat the forRnn&backRnn)-->dense-->sigmoid
    elif args.model_type == 'breg':
        logger.info('Building a BIDIRECTIONAL REGRESSION model')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()  #这句应该是多余的
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=False,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=False,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        merged = merge([forwards, backwards], mode='concat', concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1
    #embedding-->cnn-->biRnn(return_sequence=true)-->dropout-->meanOverTime-->merge(concat)-->dense-->sigmoid
    elif args.model_type == 'bregp':
        logger.info('Building a BIDIRECTIONAL REGRESSION model with POOLING')
        from keras.layers import Dense, Dropout, Embedding, LSTM, Input, merge
        model = Sequential()  #多余的
        sequence = Input(shape=(overal_maxlen, ), dtype='int32')
        output = Embedding(args.vocab_size, args.emb_dim,
                           mask_zero=True)(sequence)
        if args.cnn_dim > 0:
            output = Conv1DWithMasking(nb_filter=args.cnn_dim,
                                       filter_length=args.cnn_window_size,
                                       border_mode=cnn_border_mode,
                                       subsample_length=1)(output)
        if args.rnn_dim > 0:
            forwards = RNN(args.rnn_dim,
                           return_sequences=True,
                           dropout_W=dropout_W,
                           dropout_U=dropout_U)(output)
            backwards = RNN(args.rnn_dim,
                            return_sequences=True,
                            dropout_W=dropout_W,
                            dropout_U=dropout_U,
                            go_backwards=True)(output)
        if args.dropout_prob > 0:
            forwards = Dropout(args.dropout_prob)(forwards)
            backwards = Dropout(args.dropout_prob)(backwards)
        forwards_mean = MeanOverTime(mask_zero=True)(forwards)
        backwards_mean = MeanOverTime(mask_zero=True)(backwards)
        merged = merge([forwards_mean, backwards_mean],
                       mode='concat',
                       concat_axis=-1)
        densed = Dense(num_outputs)(merged)
        if not args.skip_init_bias:
            raise NotImplementedError
        score = Activation('sigmoid')(densed)
        model = Model(input=sequence, output=score)
        model.emb_index = 1

    logger.info('  Done')

    ###############################################################################################################################
    ## Initialize embeddings if requested
    #

    if args.emb_path:
        from w2vEmbReader import W2VEmbReader as EmbReader
        logger.info('Initializing lookup table')
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        model.layers[model.emb_index].W.set_value(
            emb_reader.get_emb_matrix_given_vocab(
                vocab, model.layers[model.emb_index].W.get_value()))
        logger.info('  Done')

    return model