def encode(texts, USE_QA_model, ConvRT_model, contexts=None): texts = [utils.simple_preprocess(text) for text in texts] contexts = [utils.simple_preprocess(text) for text in contexts] ConvRT_encodings = ConvRT_model.encode_responses(texts) USE_embeddings = USE_QA_model.signatures['response_encoder']( input=tf.constant(texts), context=tf.constant(contexts))["outputs"] return np.concatenate( [np.asarray(USE_embeddings), np.asarray(ConvRT_encodings)], axis=-1)
def encode(texts, USE_QA_model, ConvRT_model, contexts=None): texts = [utils.simple_preprocess(text) for text in texts] if contexts is not None: new_contexts = [] for context in contexts: new_context = [utils.simple_preprocess(text) for text in context] new_contexts.append(new_context) contexts = new_contexts context_encodings = ConvRT_model.encode_contexts(texts, extra_contexts=contexts) question_embeddings = USE_QA_model.signatures['question_encoder']( tf.constant(texts))["outputs"] return np.concatenate( [np.asarray(question_embeddings), np.asarray(context_encodings)], axis=-1)
def para_encode(text): global ParaNMT_embd text, tokenized_text = utils.simple_preprocess(text) paraembds = [] for token in tokenized_text: if token in ParaNMT_embd: paraembds.append(ParaNMT_embd[token]) for word in ParaNMT_embd: paradim = ParaNMT_embd[word].shape[0] break paravec = np.zeros((paradim), np.float32) if paraembds: paravec = np.sum(np.asarray(paraembds), axis=0) / math.sqrt( len(paraembds)) return paravec
model_reverse, utterance, query_encoding, candidates, response_context, conversation_history, bias=biases) print("\n") if len(str(response).split(" ")) <= 100: if flags.voice: entry = utils.simple_preprocess(str(response).lower(), for_speech=True, return_tokenized=True) entry = " ".join(entry) wavefiles = text2speech.process(entry) def f1(): utils.delay_print("Bot: " + response) def f2(): text2speech.play(wavefiles) p1 = Process(target=f1) p2 = Process(target=f2) p1.start() p2.start()