Exemplo n.º 1
0
def initialize_chatbot():
    #print("initializing chatbot... \n")
    #print("Loading dictionary...")
    with open("model_data/vocab_dict.p", 'rb') as fp:
        vocab = pickle.load(fp)
    #print(f"Loaded {len(vocab)} words")

    #print("making sample embedding matrix...")
    sample_emb = tf.zeros((len(vocab), 100))
    """ ENCODER WORK """
    #print("Initializing Encoder...")
    encoder = Encoder(len(vocab),
                      100,
                      500,
                      128,
                      sample_emb,
                      num_layers=3,
                      drop_prob=0.1)

    #print("Testing Encoder...")
    sample_hidden = encoder.initialize_hidden_state()
    ex_input_bt = tf.zeros((128, 25))
    sample_output, sample_hidden = encoder(ex_input_bt, sample_hidden)
    assert sample_output.shape == (128, 25, 500)
    assert sample_hidden.shape == (128, 500)

    #print("Loading up encoder...")
    encoder.load_weights("model_data/encoder_gpu.h5")
    """ DECODER WORK """
    #print("Initializing Decoder...")
    decoder = Decoder(len(vocab),
                      100,
                      500,
                      128,
                      sample_emb,
                      num_layers=3,
                      drop_prob=0.1)
    #print("Testing Decoder...")
    sample_decoder_output, _, _ = decoder(tf.random.uniform((128, 1)),
                                          sample_hidden, sample_output)
    assert sample_decoder_output.shape == (128, len(vocab))

    #print("Loading up decoder...")
    decoder.load_weights("model_data/decoder_gpu.h5")

    # inverse vocabulary
    inv_vocab = {v: k for k, v in vocab.items()}
    """ Some variables"""
    pad_token = 0
    sos_token = 1
    eos_token = 2
    units = 500
    maxl = 25
    """Processing functions"""

    # Convert (or remove accents) sentence to non_accents sentence
    def unicodeToAscii(s):
        return ''.join(c for c in unicodedata.normalize('NFD', s)
                       if unicodedata.category(c) != 'Mn')

    # Lowercase, trim, and remove non-letter characters
    def normalizeString(s):
        s = unicodeToAscii(s.lower().strip())
        s = re.sub(r"([.!?])", r" \1", s)
        s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
        s = re.sub(r"\s+", r" ", s).strip()
        return s

    return vocab, encoder, decoder, inv_vocab, pad_token, sos_token, eos_token, units, maxl, unicodeToAscii, normalizeString
Exemplo n.º 2
0
# print("x_train type:", type(X_train))
# print("x_train shape:", X_train.shape)
db = tf.data.Dataset.from_tensor_slices((X_train, y_train))  # 将数据集转化成tensor
db = db.map(preprocess).shuffle(10000).batch(
    batchsz)  # 将数据集转化成batch_size的大小为128的数据

# print("db_type:", type(db))
db_test = tf.data.Dataset.from_tensor_slices((X_test, y_test))
db_test = db_test.map(preprocess).batch(batchsz)

encoder = Encoder()
encoder.build(input_shape=(None, 28, 28, 1))
decoder = Decoder()
decoder.build(input_shape=(None, 64, 64, 3))

encoder.load_weights("./encoder_weights.ckpt")
decoder.load_weights('./decoder_weights.ckpt')

#图片的显示
# def generate_plot_image(gen_model, test_noise):
#     pre_images = gen_model(test_noise, training=False)
#     fig = plt.figure(figsize=(4, 4))
#     for i in range(pre_images.shape[0]):
#         plt.subplot(4, 4, i+1)
#         plt.imshow((pre_images[i, :, :, 0] + 1)/2, cmap='gray')
#         plt.axis('off')
#     plt.show()

for step, (x, y) in enumerate(db):
    en_res = encoder(x)
    de_res = decoder(en_res)