def BertCrfModel(transformer_model, max_sentence_length, label_nums):
    input_ids = tf.keras.Input(
            name="input_ids", shape=(max_sentence_length,), dtype=tf.int32
        )
    attention_mask = tf.keras.Input(
        name="attention_mask", shape=(max_sentence_length,), dtype=tf.int32
    )
    transformer = transformer_model([input_ids, attention_mask])
    hidden_states = transformer[1]

    hidden_states_size = 1
    hidden_states_ind = list(range(-hidden_states_size, 0, 1))

    selected_hidden_states = tf.keras.layers.concatenate(
        tuple([hidden_states[i] for i in hidden_states_ind])
    )
    crf = CRF(dtype="float32")
    output = tf.keras.layers.Dense(label_nums, activation="relu")(
        selected_hidden_states
    )
    output = crf(output)

    model = tf.keras.models.Model(
        inputs=[input_ids, attention_mask], outputs=output
    )
    model = ModelWithCRFLoss(model)
    return model
示例#2
0
    def build(self, hp):
        # Model definition
        inpt = Input(shape=(MAX_LEN, ))  # MAX_LEN, VECT_SIZE
        # input_dim: Size of the vocabulary, i.e. maximum integer index + 1
        # output_dim: Dimension of the dense embedding
        # input_shape: 2D tensor with shape (batch_size, input_length)

        # doc_vocab: vocabulary - number of words - of the train dataset
        model = Embedding(
            doc_vocab,
            output_dim=100,
            input_length=MAX_LEN,  # n_words + 2 (PAD & UNK)
            weights=[embedding_matrix],  # use GloVe vectors as initial weights
            mask_zero=True,
            trainable=True,
            activity_regularizer=l1(0.0000001))(inpt)  # name='word_embedding'

        # hp.Choice('activity_regularizer_1', values=[0.0, 0.00001, 0.000001, 0.0000001])

        # , activity_regularizer=l1(0.0000001)   hp.Choice('activity_regularizer_2', values=[0.0, 0.0000001, 0.00000001, 0.000000001])

        # recurrent_dropout=0.1 (recurrent_dropout: 10% possibility to drop of the connections that simulate LSTM memory cells)
        # units = 100 / 0.55 = 182 neurons (to account for 0.55 dropout)
        model = Bidirectional(
            LSTM(units=100,
                 return_sequences=True,
                 activity_regularizer=l1(0.000000001),
                 recurrent_constraint=max_norm(2)))(
                     model)  # input_shape=(1, MAX_LEN, VECT_SIZE)
        model = Dropout(hp.Choice('dropout', values=[0.0, 0.3, 0.5]))(model)
        # model = TimeDistributed(Dense(number_labels, activation="relu"))(model)  # a dense layer as suggested by neuralNer
        model = Dense(number_labels, activation=None)(
            model)  # activation='linear' (they are the same)
        crf = CRF(
        )  # CRF layer { SHOULD I SET -> number_labels+1 (+1 -> PAD) }
        out = crf(model)  # output
        model = Model(inputs=inpt, outputs=out)

        # set learning rate
        # lr_rate = InverseTimeDecay(initial_learning_rate=0.05, decay_rate=4, decay_steps=steps_per_epoch)
        # lr_rate = ExponentialDecay(initial_learning_rate=0.01, decay_rate=0.5, decay_steps=10000)

        # set optimizer
        # decay=learning_rate / epochs
        # CASE 1: decay=0.01
        # CASE 2: decay=0.1/5
        opt = SGD(
            learning_rate=0.0, momentum=0.9, clipvalue=5.0
        )  # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5]
        # opt = SGD(learning_rate=0.01, decay=0.01/steps_per_epoch, momentum=0.9, clipvalue=10.0)  # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5]
        # opt = SGD(learning_rate=lr_rate, clipvalue=3.0, clipnorm=2.0, momentum=0.9)  # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5]

        # compile Bi-LSTM-CRF
        model.compile(optimizer=opt, loss=crf.loss,
                      metrics=[crf.accuracy])  # , f1score()
        # model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.viterbi_accuracy])

        self.initial_lrate = hp.Choice('learning_rate', [0.05, 0.01])

        return model
示例#3
0
def main():
    X, y, words, tags = load_dataset(DATA_PATH)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)

    inp = Input(shape=(MAX_LEN, ))
    model = Embedding(input_dim=len(words) + 2,
                      output_dim=EMBEDDING_SIZE,
                      input_length=MAX_LEN,
                      mask_zero=True)(inp)
    model = Bidirectional(
        LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(model)
    model = TimeDistributed(Dense(50, activation="relu"))(model)
    crf = CRF(len(tags) + 1)  # CRF layer
    out = crf(model)  # output

    model = Model(inp, out)
    model.compile(optimizer="rmsprop", loss=crf.loss, metrics=[crf.accuracy])

    model.summary()

    checkpointer = ModelCheckpoint(filepath='model.h5',
                                   verbose=0,
                                   mode='auto',
                                   save_best_only=True,
                                   monitor='val_loss')

    history = model.fit(X_train,
                        np.array(y_train),
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_split=0.1,
                        callbacks=[checkpointer])
示例#4
0
    def build_crf(self):
        reg = tf.keras.regularizers.L2(1e-3)
        input = Input(shape=(self.seq_len, self.feat_dim), dtype='float32')
        mid = Dense(self.num_classes,
                    input_shape=(self.seq_len, self.feat_dim),
                    activation='linear',
                    kernel_regularizer=reg)(input)
        crf = CRF(dtype='float32', sparse_target=True)
        crf.sequence_lengths = self.seq_len
        crf.output_dim = self.num_classes
        output = crf(mid)
        model = Model(input, output)

        opt = tf.keras.optimizers.Adam(learning_rate=0.01)
        model.compile(loss=crf.loss, optimizer=opt, metrics=[crf.accuracy])

        model.load_weights(self.crf_weights)
        self.crf = model
示例#5
0
    def get_compiled_model(self, vectorizer_model_name, missing_values_handled,
                           max_sentence_length, max_word_length, n_words,
                           n_chars, n_tags, word2idx):

        vectorizer_model_settings = models[vectorizer_model_name]
        vectorizer_model_size = vectorizer_model_settings['vector_size']

        word_in = Input(shape=(max_sentence_length, ))
        if not vectorizer_model_settings['precomputed_vectors']:
            emb_word = Embedding(input_dim=n_words + 2,
                                 output_dim=vectorizer_model_size,
                                 input_length=max_sentence_length,
                                 mask_zero=True)(word_in)
        else:
            embedding_weights = get_embedding_weights(vectorizer_model_name,
                                                      vectorizer_model_size,
                                                      missing_values_handled,
                                                      word2idx)
            emb_word = Embedding(input_dim=n_words + 2,
                                 output_dim=vectorizer_model_size,
                                 input_length=max_sentence_length,
                                 mask_zero=True,
                                 weights=[embedding_weights],
                                 trainable=False)(word_in)

        # input and embeddings for characters
        char_in = Input(shape=(
            max_sentence_length,
            max_word_length,
        ))
        emb_char = TimeDistributed(
            Embedding(input_dim=n_chars + 2, output_dim=10,
                      mask_zero=True))(char_in)
        # character LSTM to get word encodings by characters
        char_enc = TimeDistributed(
            LSTM(units=20, return_sequences=False,
                 recurrent_dropout=0.5))(emb_char)

        # main LSTM
        x = concatenate([emb_word, char_enc])
        # x = SpatialDropout1D(0.3)(x)

        model = Bidirectional(
            LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))(x)
        model = TimeDistributed(Dense(50, activation='relu'))(model)

        crf = CRF(n_tags + 1)
        out = crf(model)

        model = Model([word_in, char_in], out)
        model.summary()
        model.compile(optimizer="rmsprop",
                      loss=crf.loss,
                      metrics=[crf.accuracy])
        return model
示例#6
0
def test_model():
    inputs = Input(shape=(None, ), dtype='int32')
    output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs)
    output = Bidirectional(GRU(64, return_sequences=True))(output)
    output = Dense(9, activation=None)(output)
    crf = CRF(dtype='float32', name='crf')
    output = crf(output)
    base_model = Model(inputs, output)
    model = ModelWithCRFLoss(base_model)
    model.compile(optimizer='adam')
    return model
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)

        self.num_labels = config.num_labels
        self.config = config
        self.bert = TFBertMainLayer(self.config, name="bert")
        self.bilstm = Bidirectional(
            LSTM(units=50, return_sequences=True, recurrent_dropout=0.1))
        self.dropout = Dropout(0.2)
        self.time_distributed = TimeDistributed(
            Dense(self.num_labels, activation="relu"))
        self.crf = CRF(self.num_labels + 1)
示例#8
0
        def __call__(self):
                self.reg = tf.keras.regularizers.L2(self.L2)
                input = Input(shape=(self.seq_dim, self.feat_dim), dtype='float32')
                mid = Dense(self.num_classes, input_shape=(self.seq_dim, self.feat_dim), activation='softmax', kernel_regularizer=self.reg)(input)
                self.crf = CRF(dtype='float32', sparse_target=True)
                self.crf.sequence_lengths = self.seq_dim
                self.crf.output_dim = self.num_classes
                output = self.crf(mid)
                model = Model(input, output)
                
                self.model = model

                return model
示例#9
0
def test():
    inputs = Input(shape=(None, ), dtype='int32')
    output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs)
    output = Bidirectional(GRU(64, return_sequences=True))(output)
    output = Dense(9, activation=None)(output)
    crf = CRF(dtype='float32')
    output = crf(output)
    model = Model(inputs, output)
    model.compile(loss=crf.loss, optimizer='adam', metrics=[crf.accuracy])

    x = [[5, 2, 3] * 3] * 10
    y = [[1, 2, 3] * 3] * 10

    model.fit(x=x, y=y, epochs=2, batch_size=2)
    model.save('model')
示例#10
0
def test():
    inputs = Input(shape=(None, ), dtype='int32')
    output = Embedding(100, 40, trainable=True, mask_zero=True)(inputs)
    output = Bidirectional(GRU(64, return_sequences=True))(output)
    output = Dense(9, activation=None)(output)
    crf = CRF(dtype='float32')
    output = crf(output)
    base_model = Model(inputs, output)
    model = ModelWithCRFLoss(base_model)
    model.compile(optimizer='adam')

    x = np.array([[5, 2, 3] * 3] * 100)
    y = np.array([[1, 2, 3] * 3] * 100)

    model.fit(x=x, y=y, epochs=10, batch_size=4, validation_split=0.1)
    model.save('model')
示例#11
0
def _bi_lstm_crf_model(n_words: int, n_tags: int, max_len: int):
    """Model"""
    input_ = tf.keras.layers.Input(shape=(max_len, ), name='input_layer')
    embedding_layer = tf.keras.layers.Embedding(input_dim=n_words + 2,
                                                output_dim=50,
                                                mask_zero=True,
                                                name='embedding_layer')(input_)
    lstm_layer = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(units=128,
                             return_sequences=True,
                             activation="relu",
                             recurrent_dropout=0.4))(embedding_layer)
    tensor = tf.keras.layers.Dropout(0.4)(lstm_layer)
    tensor = tf.keras.layers.Dense(n_tags)(tensor)
    crf = CRF(n_tags)
    output = crf(tensor)
    model = tf.keras.models.Model(input_, output)

    return model
示例#12
0
    def __init__(self,
                 vocab_size=10,
                 num_states=4,
                 embedding_dim=16,
                 rnn_units=8):
        super(BiRNNCRF, self).__init__()

        # 0 for `pad`, 1 for `unk`
        self.embedding_layer = Embedding(input_dim=vocab_size + 2,
                                         output_dim=embedding_dim)

        # merge_mode: sum, mul, concat, ave. Default is `concat`.
        self.bi_rnn_layer = Bidirectional(GRU(units=rnn_units,
                                              return_sequences=True),
                                          merge_mode="ave")
        # 4-tag: B, M, E, S, `0` for pad
        self.dense_layer = Dense(units=5, activation='softmax')

        self.crf_layer = CRF()
示例#13
0
    def get_compiled_model(self, vectorizer_model_name, missing_values_handled,
                           max_sentence_length, max_word_length, n_words,
                           n_tags, word2idx):

        vectorizer_model_settings = models[vectorizer_model_name]
        vectorizer_model_size = vectorizer_model_settings['vector_size']

        word_in = Input(shape=(max_sentence_length, ))
        if not vectorizer_model_settings['precomputed_vectors']:
            emb_word = Embedding(input_dim=n_words + 2,
                                 output_dim=vectorizer_model_size,
                                 input_length=max_sentence_length,
                                 mask_zero=True)(word_in)
        else:
            embedding_weights = get_embedding_weights(vectorizer_model_name,
                                                      vectorizer_model_size,
                                                      missing_values_handled,
                                                      word2idx)
            emb_word = Embedding(input_dim=n_words + 2,
                                 output_dim=vectorizer_model_size,
                                 input_length=max_sentence_length,
                                 mask_zero=True,
                                 weights=[embedding_weights],
                                 trainable=False)(word_in)

        model = Bidirectional(
            LSTM(units=50, return_sequences=True,
                 recurrent_dropout=0.1))(emb_word)
        model = TimeDistributed(Dense(50, activation='relu'))(model)
        # print(dir(CRF))
        crf = CRF(n_tags + 1)
        out = crf(model)

        model = Model(word_in, out)
        model.summary()
        model.compile(optimizer="rmsprop",
                      loss=crf.loss,
                      metrics=[crf.accuracy])
        return model
示例#14
0
with open('w2i.json', encoding="utf8") as json_file:
    word2idx = json.load(json_file)
with open('i2w.json', encoding="utf8") as json_file:
    idx2word = json.load(json_file)

max_len=223

model=Sequential()
model.add(Embedding(input_dim=len(word2idx), output_dim=40, input_length=max_len, mask_zero=False))
model.add(Bidirectional(LSTM(units=64, return_sequences=True, recurrent_dropout=0.2, dropout=0.2)))
model.add(TimeDistributed(Dense(64, activation="relu")))
model.add(Dropout(0.2))
model.add(TimeDistributed(Dense(64, activation="relu")))
model.add(Dropout(0.2))
crf = CRF(4)  # CRF layer
model.add(crf)
model.load_weights('model.h5')

name=[]
for s in sentences:
    s_ids=[]
    for w in s:
        if (w in word2idx):
            s_ids.append(word2idx[w])
        else:
            s_ids.append(word2idx['UNK'])
    X = pad_sequences(maxlen=max_len, sequences=[s_ids], padding="post", value=word2idx["PAD"])
    predict = model.predict(X)
    for i in range(max_len):
        if predict[0][i] == 1:
    trainable=True,
    activity_regularizer=l1(0.0000001))(inpt)  # name='word_embedding'

# recurrent_dropout=0.1 (recurrent_dropout: 10% possibility to drop of the connections that simulate LSTM memory cells)
# units = 100 / 0.55 = 182 neurons (to account for 0.55 dropout)
model = Bidirectional(
    LSTM(units=100,
         return_sequences=True,
         activity_regularizer=l1(0.0000000001),
         recurrent_constraint=max_norm(2)))(
             model)  # input_shape=(1, MAX_LEN, VECT_SIZE)
# model = Dropout(0.3)(model)  # 0.5
# model = TimeDistributed(Dense(number_labels, activation="relu"))(model)  # a dense layer as suggested by neuralNer
model = Dense(number_labels, activation=None)(
    model)  # activation='linear' (they are the same)
crf = CRF()  # CRF layer { SHOULD I SET -> number_labels+1 (+1 -> PAD) }
out = crf(model)  # output
model = Model(inputs=inpt, outputs=out)

# set optimizer
# decay=learning_rate / epochs
opt = SGD(learning_rate=0.0, momentum=0.9, clipvalue=5.0
          )  # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5]
#opt = SGD(learning_rate=0.05, decay=0.01, momentum=0.9, clipvalue=5.0)  # clipvalue (Gradient Clipping): clip the gradient to [-5 to 5]

# compile Bi-LSTM-CRF
model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.accuracy])
# model.compile(optimizer=opt, loss=crf.loss, metrics=[crf.viterbi_accuracy])

print('BEFORE TRAINING', model.get_weights())
示例#16
0
"""# CRF

## Model
"""

inputs = Input((MAX_SEQ_LENGTH, ))
output = Embedding(len(train_val_vocab),
                   embedding_dimension,
                   embeddings_initializer=tf.keras.initializers.Constant(
                       embedding_matrix_train_validate),
                   trainable=False,
                   mask_zero=True)(inputs)
output = Bidirectional(LSTM(crf_rnn_units, return_sequences=True))(output)
output = TimeDistributed(Dense(len(pos_tags)))(output)

crf = CRF(dtype='float32')
output = crf(output)
base_model = Model(inputs, output)
model_CRF = ModelWithCRFLoss(base_model)
model_CRF.compile(optimizer='adam')
model_CRF.build((None, MAX_SEQ_LENGTH))
model_CRF.summary()
"""## Fit"""

Y_train_crf = flatten_y(Y_train)
Y_validate_crf = flatten_y(Y_validate)
model_CRF.fit(X_train,
              Y_train_crf,
              validation_data=(X_validate, Y_validate_crf),
              batch_size=crf_batch_size,
              epochs=gru_epochs,
示例#17
0
st.set_option('deprecation.showPyplotGlobalUse', False)

# Setting global network params
SEQ_LEN = 1024
NUM_CLASSES = 25
FEAT_DIM = 128

# Since tf2crf is a custom class, need to build the CRF model and load the weights (can not load the model directly)
reg = tf.keras.regularizers.L2(1e-3)
input = Input(shape=(SEQ_LEN, FEAT_DIM), dtype='float32')
mid = Dense(NUM_CLASSES,
            input_shape=(SEQ_LEN, FEAT_DIM),
            activation='linear',
            kernel_regularizer=reg)(input)
crf = CRF(dtype='float32', sparse_target=True)
crf.sequence_lengths = SEQ_LEN
crf.output_dim = NUM_CLASSES
output = crf(mid)
model = Model(input, output)

opt = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss=crf.loss, optimizer=opt, metrics=[crf.accuracy])

load_from = './model_01'

model.load_weights(load_from)

crf = model
# Loading FCNN feature extractor
cnn = load_model('cnn_extractor.h5')
示例#18
0
 def __init__(self):
     super().__init__(name='intent')
     self.bert = TFBertModel.from_pretrained('bert-base-uncased')
     self.dropout = Dropout(0.1)
     self.dense = Dense(9, activation='relu')
     self.crf = CRF(9)