def bilstm(X_train, X_test, Y_train, Y_test, wordembeddings):
    np.random.seed(1234)
    tf.random.set_seed(1234)
    random.seed(1234)

    max_length_sentence = X_train.str.split().str.len().max()
    tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n\'',
                          lower=True)
    tokenizer.fit_on_texts(X_train)
    word_index = tokenizer.word_index
    EMBEDDING_DIM = 300
    vocabulary_size = len(word_index) + 1
    print('Found %s unique tokens.' % len(word_index))

    sequences_train = tokenizer.texts_to_sequences(X_train)
    sequences_valid = tokenizer.texts_to_sequences(X_test)
    X_train = pad_sequences(sequences_train, maxlen=max_length_sentence)
    X_val = pad_sequences(sequences_valid, maxlen=X_train.shape[1])
    y_train = np.asarray(Y_train)
    y_val = np.asarray(Y_test)
    #print(word_index)
    '''
    print('Shape of data tensor:', X_train.shape)
    print('Shape of data tensor:', X_val.shape)
    print('Shape of data tensor:', y_train.shape)
    print('Shape of data tensor:', y_val.shape)
    
    print(X_train)
    print("*"*100)
    print(X_val)
    print("*"*100)
    print(y_train)
    print("*"*100)
    print(y_val)
    '''

    embedding_matrix = np.zeros((vocabulary_size, EMBEDDING_DIM))
    for word, i in word_index.items():
        if (word in wordembeddings.keys()):
            embedding_vector = wordembeddings[word]
            if len(embedding_vector) == 0:  #if array is empty
                embedding_vector = wordembeddings[word.title()]
                if len(embedding_vector) == 0:
                    embedding_vector = wordembeddings[word.upper()]
                    if len(embedding_vector) == 0:
                        embedding_vector = np.array([
                            round(np.random.rand(), 8) for i in range(0, 300)
                        ])

        else:
            #print("WORD NOT IN DICT",word)
            embedding_vector = np.array(
                [round(np.random.rand(), 8) for i in range(0, 300)])

        if len(embedding_vector) != 0:
            embedding_matrix[i] = embedding_vector

    embedding_layer = Embedding(vocabulary_size,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                trainable=False)  #Try with True

    inputs = Input(shape=(X_train.shape[1], ))
    model = (Embedding(vocabulary_size,
                       EMBEDDING_DIM,
                       input_length=max_length_sentence,
                       weights=[embedding_matrix]))(inputs)

    model = Bidirectional(GRU(64))(
        model)  # !!!!!!! CHANGE THIS FOR OTHER MODELS
    model = (Dense(900, activation='relu'))(model)
    model = (Dense(400, activation='relu'))(model)
    model = (Dense(250, activation='relu'))(model)
    model = (Dense(204, activation='softmax'))(model)
    model = Model(inputs=inputs, outputs=model)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.summary()

    callbacks = [EarlyStopping(monitor='val_loss')]
    hist_adam = model.fit(
        X_train,
        y_train,
        batch_size=1000,
        epochs=200,
        verbose=1,
        validation_data=(X_val, y_val),
        callbacks=callbacks
    )  #!!!!!!!!!!!!!!!!!!!!!!!CHANGE BATCH SIZE TO 1000 #change epochs to 200

    model.save(config.bigru_prepocessed_dataset1_chai
               )  # !!!!!!! CHANGE THIS FOR OTHER MODELS

    y_pred = model.predict(X_val)
    print(y_pred)

    y_val_class = pd.DataFrame(y_val).idxmax(axis=1)
    print(y_val_class)

    y_val_class_argmax = np.argmax(y_val, axis=1)
    y_pred_class_argmax = np.argmax(y_pred, axis=1)

    y_pred_class = pd.DataFrame(y_pred).idxmax(axis=1)
    print(y_pred_class)

    print(classification_report(y_val_class, y_pred_class))

    plt.suptitle('Optimizer : Adam', fontsize=10)
    plt.ylabel('Loss', fontsize=16)
    plt.xlabel('Epoch', fontsize=14)
    plt.plot(hist_adam.history['loss'], color='b', label='Training Loss')
    plt.plot(hist_adam.history['val_loss'], color='r', label='Validation Loss')
    plt.legend(loc='upper right')

    plt.savefig(
        '/home/ubuntu/asset_classification/results/bigru_model_dataset1_preprocessed_chai.png'
    )  # !!!!!!! CHANGE THIS FOR OTHER MODELS

    tf.keras.utils.plot_model(
        model, to_file=config.bigru_architecture,
        show_shapes=True)  # !!!!!!! CHANGE THIS FOR OTHER MODELS

    return (y_pred, y_val_class, y_pred_class, y_val_class_argmax,
            y_pred_class_argmax)
示例#2
0
encoder = Bidirectional(LSTM(HIDDEN_SIZE),
                        merge_mode='sum',
                        name="encoder_lstm")(inputs)
decoder = RepeatVector(max(sent_lens))(encoder)
decoder = Bidirectional(LSTM(EMBED_SIZE, return_sequences=True),
                        merge_mode='sum')(decoder)
autoencoder = Model(inputs, decoder)
autoencoder.compile(optimizer='sgd', loss='mse')

num_train_step = len(X_train) // BATCH_SIZE
num_test_step = len(X_test) // BATCH_SIZE

hist = autoencoder.fit_generator(train_gen, steps_per_epoch = num_train_step, epochs = 20, validation_data = test_gen, \
                          validation_steps=num_test_step)

encoder = Model(autoencoder.input,
                autoencoder.get_layer("encoder_lstm").output)


def compare_cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x, 2) * np.linalg.norm(y, 2))


for i in range(10):
    Xtest, Ytest = next(test_gen)
    Ytest_ = autoencoder.predict(Xtest)
    Xvec = encoder.predict(Xtest)
    Yvec = encoder.predict(Ytest_)
    for j in range(Xvec.shape[0]):
        print(compare_cosine_similarity(Xvec[j], Yvec[j]))
示例#3
0
def train(is_debug=False):
    train_data = open("dataset/atis-2.train.w-intent.iob", "r").readlines()
    test_data = open("dataset/atis-2.dev.w-intent.iob", "r").readlines()
    train_data_ed = data_pipeline(train_data)
    test_data_ed = data_pipeline(test_data)
    word2index, index2word, slot2index, index2slot, intent2index, index2intent = \
        get_info_from_training_data(train_data_ed)
    # print("slot2index: ", slot2index)
    # print("index2slot: ", index2slot)
    index_train = to_index(train_data_ed, word2index, slot2index, intent2index)
    index_test = to_index(test_data_ed, word2index, slot2index, intent2index)

    intents = [item[3] for item in index_train]
    intent_labels = np.eye(intent_size)[np.array(intents)]
    intent_train = [item[0] for item in index_train]
    intent_train = np.array(intent_train)
    slot_train = [item[2] for item in index_train]
    slot_train = np.array(slot_train)
    slot_train_target = np.insert(slot_train,
                                  slot_train.shape[1] - 1,
                                  values=0,
                                  axis=1)
    slot_train_target = np.delete(slot_train_target, 0, axis=1)
    slot_train_target = np.eye(slot_size)[slot_train_target]
    seq = np.array(index_test[0][0])

    import tensorflow as tf
    from keras.layers import Lambda
    print_func = Lambda(lambda x: tf.Print(x, [tf.shape(x)]))
    squeeze = Lambda(lambda x: tf.squeeze(x, axis=2))

    # encoder define
    input_voc = Input(shape=(None, 1))
    embedding_voc = Embedding(input_dim=vocab_size,
                              output_dim=embedding_size,
                              mask_zero=True)
    embedding_voc_out = embedding_voc(input_voc)
    encoder_lstm1 = Bidirectional(LSTM(units=hidden_size,
                                       dropout=0.7,
                                       return_sequences=True),
                                  merge_mode="concat")
    embedding_voc_out = squeeze(embedding_voc_out)
    # embedding_voc_out = K.backend.squeeze(embedding_voc_out, axis=0)
    encoder_lstm1_out = encoder_lstm1(embedding_voc_out)

    # encoder
    encoder = Bidirectional(LSTM(units=hidden_size,
                                 dropout=0.7,
                                 return_sequences=False,
                                 return_state=True),
                            merge_mode="concat")

    # encoder output, states
    encoder_out, forward_h, forward_c, backward_h, backward_c = encoder(
        encoder_lstm1_out)
    encoder_state = [forward_h, forward_c, backward_h, backward_c]

    # intent
    intent = Dense(intent_size, activation="linear")(encoder_out)
    intent = Dense(intent_size, activation="softmax")(intent)

    # encode state
    forward_h = K.layers.concatenate([forward_h, backward_h], 1)
    forward_c = K.layers.concatenate([forward_c, backward_c], 1)
    encoder_state = [forward_h, forward_c]

    # decoder define
    input_slot = Input(shape=(None, 1))
    embedding_slot = Embedding(input_dim=slot_size,
                               output_dim=embedding_size,
                               mask_zero=True)
    embedding_slot_out = embedding_slot(input_slot)
    embedding_slot_out = squeeze(embedding_slot_out)
    decoder_lstm1 = LSTM(units=hidden_size * 2,
                         dropout=0.7,
                         return_state=True,
                         return_sequences=True)
    decoder_lstm1_out, forward_h, forward_c = decoder_lstm1(
        embedding_slot_out, initial_state=[forward_h, forward_c])
    decoder = LSTM(units=hidden_size * 2,
                   dropout=0.7,
                   return_sequences=True,
                   return_state=False)
    decoder_output = decoder(decoder_lstm1_out)
    dense1 = Dense(slot_size, activation="linear")
    dense1_out = dense1(decoder_output)
    dense2 = Dense(slot_size, activation="softmax")
    dense2_out = dense2(dense1_out)
    model = Model(inputs=[input_voc, input_slot], outputs=[intent, dense2_out])

    # print(model.summary())

    def intent_slot_loss(y_true, y_pred):
        y_slot_true = y_true[0]
        y_intent_true = y_true[1]
        y_slot_pred = y_pred[0]
        y_intent_pred = y_pred[1]

        return K.losses.categorical_crossentropy(
            y_slot_true, y_slot_pred) + K.losses.categorical_crossentropy(
                y_intent_true, y_intent_pred)

    model.compile(optimizer="adam",
                  loss=categorical_crossentropy,
                  metrics=["acc"])
    # acc = model.fit([np.expand_dims(intent_train, 2), np.expand_dims(slot_train, 2)], [intent_labels, slot_train_target], batch_size=batch_size, epochs=1)
    # print(acc)
    # model.save_weights("nlu.hdf5")
    model.load_weights("nlu.hdf5")

    ## inference
    encoder = Model(input_voc, encoder_state)
    intenter = Model(input_voc, intent)
    # print(encoder.summary())

    # decoder
    decoder_in = Input(shape=(1, ))
    decoder_state_in_h = Input(shape=(hidden_size * 2, ))
    decoder_state_in_c = Input(shape=(hidden_size * 2, ))
    decoder_state_in = [decoder_state_in_h, decoder_state_in_c]
    decoder_out, decoder_h, decoder_c = decoder_lstm1(
        embedding_slot(decoder_in), initial_state=decoder_state_in)
    decoder_state_out = [decoder_h, decoder_c]  #output
    decoder_output = decoder(decoder_out)
    dense1_out = dense1(decoder_output)
    dense2_out = dense2(dense1_out)  #output
    decoder_model = Model([decoder_in] + decoder_state_in,
                          [dense2_out] + decoder_state_out)
    print(decoder_model.summary())

    i = 0
    print(seq)
    seq = np.expand_dims(np.array(seq), 0)
    seq = np.expand_dims(np.array(seq), 2)
    encoder_state = encoder.predict(seq)
    intent = intenter.predict(seq)
    intent = np.argmax(intent)
    print(index2intent[intent])

    index = slot2index["<STR>"]
    while i < input_steps:
        i = i + 1
        decoder_out, state_h, state_c = decoder_model.predict(
            [np.array([index])] + encoder_state)
        index = np.argmax(decoder_out)
        encoder_state = [state_h, state_c]
        print(index2slot[index])
示例#4
0
history = model.fit(X_train,
                    np.array(y_train),
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.1,
                    verbose=2)


def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            p_i = np.argmax(p)
            out_i.append(idx2tag[p_i].replace("PADword", "O"))
        out.append(out_i)
    return out


pred_y = model.predict(X_test)
pred_y = np.argmax(pred_y, axis=-1)
y_test_true = np.argmax(y_test, -1)

pred_y = [[index_to_tag[i].replace("PADword", "O")
           for i in pred_y[index]][0:len(y_test[index])]
          for index in range(len(pred_y))]
y_test_true = [[index_to_tag[i] for i in row] for row in y_test]

print('LSTM Classification Report\n',
      metrics.flat_classification_report(pred_y, y_test_true))
# show figure
plt.show()

score = model.evaluate([X_w_te,np.array(X_c_te).reshape((len(X_c_te), max_len, max_len_char))], np.array(y_te), batch_size=batch_size,verbose=1)
print(model.metrics_names)
print("Score:")
print(score)

# ## Prediction on test set
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
# print("Input:")
# print(X_te[0])
# print("Supposed output:")
# print(y_te)
# print(np.array(y_te))
test_pred = model.predict([X_w_te,np.array(X_c_te).reshape((len(X_c_te), max_len, max_len_char))], verbose=1)
# print("Prediction result:")
# print(test_pred[0])
idx2tag = {i: w for w,i in tags2idx.items()}
tags_size = len(idx2tag)
idx2tag[tags_size] = 'O'

def pred2label(pred):
    out = []
    for pred_i in pred:
        out_i = []
        for p in pred_i:
            p_i = np.argmax(p)
            out_i.append(idx2tag[p_i].replace("PAD", "O"))
        out.append(out_i)
    return out
示例#6
0
all_train = all[:int(0.8 * all.shape[0]), ...]
Y_train = Y[:int(0.8 * all.shape[0]), ...]
Y_train_dense = np.reshape(Y_train, (Y_train.shape[0], Y_train.shape[1]))
Y_train_dense = np.argmax(Y_train_dense, axis=-1)

all_test = all[int(0.8 * all.shape[0]):, ...]
Y_test = Y[int(0.8 * all.shape[0]):, ...]
# pu.db
Y_test_dense = np.reshape(Y_test, (Y_test.shape[0], Y_test.shape[1]))
Y_test_dense = np.argmax(Y_test_dense, axis=-1)

for i in xrange(100):
    print i
    model.fit(all_train, Y_train, batch_size=1000, epochs=5, verbose=1)

    Y_pred_train = model.predict(all_train, batch_size=1000)
    Y_pred_test = model.predict(all_test, batch_size=1000)

    Y_pred_train_dense = np.reshape(
        Y_pred_train, (Y_pred_train.shape[0], Y_pred_train.shape[1]))
    Y_pred_train_dense = np.argmax(Y_pred_train_dense, axis=-1)

    Y_pred_test_dense = np.reshape(
        Y_pred_test, (Y_pred_test.shape[0], Y_pred_test.shape[1]))
    Y_pred_test_dense = np.argmax(Y_pred_test_dense, axis=-1)

    train_acc = np.sum(
        Y_pred_train_dense == Y_train_dense) * 100.0 / len(Y_pred_train_dense)
    val_acc = np.sum(
        Y_pred_test_dense == Y_test_dense) * 100.0 / len(Y_pred_test_dense)