Exemplo n.º 1
0
import argparse
import os
import sys

import numpy as np

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from cakechat.utils.env import init_theano_env

init_theano_env()

from cakechat.config import QUESTIONS_CORPUS_NAME, INPUT_SEQUENCE_LENGTH, INPUT_CONTEXT_SIZE, \
    PREDICTION_MODES, PREDICTION_MODE_FOR_TESTS, DEFAULT_CONDITION, RANDOM_SEED
from cakechat.utils.text_processing import get_tokens_sequence, replace_out_of_voc_tokens
from cakechat.utils.dataset_loader import get_tokenized_test_lines
from cakechat.dialog_model.model_utils import transform_context_token_ids_to_sentences, \
    transform_contexts_to_token_ids, lines_to_context
from cakechat.dialog_model.inference import get_nn_responses
from cakechat.dialog_model.factory import get_trained_model

np.random.seed(seed=RANDOM_SEED)


def load_corpus(nn_model, corpus_name):
    return get_tokenized_test_lines(corpus_name,
                                    set(nn_model.index_to_token.values()))


def process_text(nn_model, text):
    tokenized_line = get_tokens_sequence(text)
Exemplo n.º 2
0
import os
import sys
import unittest
import numpy as np

sys.path.append(
    os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))))

from cakechat.utils.env import init_theano_env

init_theano_env()

from cakechat.dialog_model.factory import get_trained_model
from cakechat.dialog_model.inference import get_sequence_log_probs
from cakechat.dialog_model.inference.utils import get_next_token_log_prob_one_step
from cakechat.config import DECODER_DEPTH, HIDDEN_LAYER_DIMENSION, RANDOM_SEED

np.random.seed(seed=RANDOM_SEED)


class TestPredict(unittest.TestCase):
    @staticmethod
    def _predict_log_probabilities_one_step(nn_model, x_batch, y_batch):
        """
        Predict answers for every sequence token by token until EOS_TOKEN occurred in the sequence using sampling with temperature.
        All the rest of the sequence is filled with PAD_TOKENs.
        """
        thought_vectors_batch = nn_model.get_thought_vectors(x_batch)
        hidden_states_batch = np.zeros((x_batch.shape[0], DECODER_DEPTH, HIDDEN_LAYER_DIMENSION), dtype=np.float32)

        total_log_probs = np.zeros((y_batch.shape[0], y_batch.shape[1] - 1, nn_model.vocab_size))