def __init__(self, data_dir, model_dir, task_id, isInteractive=True, OOV=False, memory_size=50, random_state=None,
                 batch_size=32, learning_rate=0.001, epsilon=1e-8, max_grad_norm=40.0, evaluation_interval=10, hops=3,
                 epochs=200, embedding_size=20,intro_times=20):
        self.data_dir = data_dir
        self.task_id = task_id
        self.model_dir = model_dir
        # self.isTrain=isTrain
        self.isInteractive = isInteractive
        self.OOV = OOV
        self.memory_size = memory_size
        self.random_state = random_state
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.max_grad_norm = max_grad_norm
        self.evaluation_interval = evaluation_interval
        self.hops = hops
        self.epochs = epochs
        self.embedding_size = embedding_size
        self.intro_times=intro_times

        candidates, self.candid2indx = load_candidates(
            self.data_dir, self.task_id)
        self.n_cand = len(candidates)
        print("Candidate Size", self.n_cand)
        self.indx2candid = dict(
            (self.candid2indx[key], key) for key in self.candid2indx)
        # task data
        self.trainData, self.testData, self.valData = load_dialog_task(
            self.data_dir, self.task_id, self.candid2indx, self.OOV)
        data = self.trainData + self.testData + self.valData

        self.build_vocab(data, candidates)
        #build training words set
        # pdb.set_trace()
        self.train_val_wordset = self.words_set(self.valData+self.trainData)
        all_wordset = self.words_set(data)
        no_oov_word = len(self.train_val_wordset)
        with_oov_word = len(all_wordset)
        print('oov words', with_oov_word - no_oov_word)
        # new_words=[]
        # for word in all_wordset:
        #     if word not in self.train_val_wordset:
        #         new_words.append(self.idx_word[word])
        # print('These words are new:',new_words)
        # pdb.set_trace()
        # self.candidates_vec=vectorize_candidates_sparse(candidates,self.word_idx)
        self.candidates_vec = vectorize_candidates(
            candidates, self.word_idx, self.candidate_sentence_size)
        optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate, epsilon=self.epsilon)
        self.sess = tf.Session()
        self.model = MemN2NDialog(self.batch_size, self.vocab_size, self.n_cand, self.sentence_size,
                                  self.embedding_size, self.candidates_vec, session=self.sess,
                                  hops=self.hops, max_grad_norm=self.max_grad_norm, optimizer=optimizer,
                                  task_id=task_id,introspection_times=self.intro_times)
        self.saver = tf.train.Saver(max_to_keep=1)

        self.summary_writer = tf.summary.FileWriter(
            self.model.root_dir, self.model.graph_output.graph)
Пример #2
0
    def __init__(self, data_dir, model_dir, task_id, isInteractive=True, OOV=False, memory_size=50, random_state=None, batch_size=32, learning_rate=0.001, epsilon=1e-8, max_grad_norm=40.0, evaluation_interval=10, hops=3, epochs=200, embedding_size=20):
        self.data_dir = data_dir
        self.task_id = task_id
        self.model_dir = model_dir
        # self.isTrain=isTrain
        self.isInteractive = isInteractive
        self.OOV = OOV
        self.memory_size = memory_size
        self.random_state = random_state
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.max_grad_norm = max_grad_norm
        self.evaluation_interval = evaluation_interval
        self.hops = hops
        self.epochs = epochs
        self.embedding_size = embedding_size

        candidates, self.candid2indx = load_candidates(
            self.data_dir, self.task_id)
        self.n_cand = len(candidates)
        print("Candidate Size", self.n_cand)
        self.indx2candid = dict(
            (self.candid2indx[key], key) for key in self.candid2indx)
        # task data
        self.trainData, self.testData, self.valData = load_dialog_task(
            self.data_dir, self.task_id, self.candid2indx, self.OOV)
        data = self.trainData + self.testData + self.valData
        self.build_vocab(data, candidates)
        # self.candidates_vec=vectorize_candidates_sparse(candidates,self.word_idx)
        self.candidates_vec = vectorize_candidates(
            candidates, self.word_idx, self.candidate_sentence_size)
        optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate, epsilon=self.epsilon)
        self.sess = tf.Session()
        self.model = MemN2NDialog(self.batch_size, self.vocab_size, self.n_cand, self.sentence_size, self.embedding_size, self.candidates_vec, session=self.sess,
                                  hops=self.hops, max_grad_norm=self.max_grad_norm, optimizer=optimizer, task_id=task_id)
        self.saver = tf.train.Saver(max_to_keep=50)

        self.summary_writer = tf.summary.FileWriter(
            self.model.root_dir, self.model.graph_output.graph)
Пример #3
0
    def __init__(self,
                 data_dir,
                 model_dir,
                 task_id,
                 OOV=False,
                 memory_size=250,
                 random_state=None,
                 batch_size=32,
                 learning_rate=0.001,
                 epsilon=1e-8,
                 max_grad_norm=40.0,
                 evaluation_interval=10,
                 hops=3,
                 epochs=10,
                 embedding_size=20,
                 save_vocab=False,
                 load_vocab=False):
        """Creates wrapper for training and testing a chatbot model.

        Args:
            data_dir: Directory containing personalized dialog tasks.

            model_dir: Directory containing memn2n model checkpoints.

            task_id: Personalized dialog task id, 1 <= id <= 5. Defaults to `1`.

            OOV: If `True`, use OOV test set. Defaults to `False`

            memory_size: The max size of the memory. Defaults to `250`.

            random_state: Random state to set graph-level random seed. Defaults to `None`.

            batch_size: Size of the batch for training. Defaults to `32`.

            learning_rate: Learning rate for Adam Optimizer. Defaults to `0.001`.

            epsilon: Epsilon value for Adam Optimizer. Defaults to `1e-8`.

            max_gradient_norm: Maximum L2 norm clipping value. Defaults to `40.0`.

            evaluation_interval: Evaluate and print results every x epochs.
            Defaults to `10`.

            hops: The number of hops over memory for responding. A hop consists
            of reading and addressing a memory slot. Defaults to `3`.

            epochs: Number of training epochs. Defualts to `200`.

            embedding_size: The size of the word embedding. Defaults to `20`.

            save_vocab: If `True`, save vocabulary file. Defaults to `False`.

            load_vocab: If `True`, load vocabulary from file. Defaults to `False`.
        """

        self.data_dir = data_dir
        self.task_id = task_id
        self.model_dir = model_dir
        self.OOV = OOV
        self.memory_size = memory_size
        self.random_state = random_state
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.max_grad_norm = max_grad_norm
        self.evaluation_interval = evaluation_interval
        self.hops = hops
        self.epochs = epochs
        self.embedding_size = embedding_size
        self.save_vocab = save_vocab
        self.load_vocab = load_vocab

        candidates, self.candid2indx = load_candidates(self.data_dir,
                                                       self.task_id)
        self.n_cand = len(candidates)
        # print("Candidate Size", self.n_cand)
        self.indx2candid = dict(
            (self.candid2indx[key], key) for key in self.candid2indx)

        # Task data
        self.trainData, self.testData, self.valData = load_dialog_task(
            self.data_dir, self.task_id, self.candid2indx, self.OOV)

        # print(self.testData)
        data = self.trainData + self.testData + self.valData

        self.build_vocab(data, candidates, self.save_vocab, self.load_vocab)
        print("build_vocab", self.build_vocab)
        self.candidates_vec = vectorize_candidates(
            candidates, self.word_idx, self.candidate_sentence_size)
        print("build_vocab", self.candidates_vec)
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                           epsilon=self.epsilon)

        self.sess = tf.Session()

        self.model = MemN2NDialog(self.batch_size,
                                  self.vocab_size,
                                  self.n_cand,
                                  self.sentence_size,
                                  self.embedding_size,
                                  self.candidates_vec,
                                  session=self.sess,
                                  hops=self.hops,
                                  max_grad_norm=self.max_grad_norm,
                                  optimizer=optimizer,
                                  task_id=task_id)

        self.saver = tf.train.Saver(max_to_keep=50)
Пример #4
0
    def __init__(self,
                 data_dir,
                 model_dir,
                 task_id,
                 source,
                 resFlag,
                 wrong_conversations,
                 error,
                 acc_each_epoch,
                 acc_ten_epoch,
                 conv_wrong_right,
                 epochs,
                 OOV=False,
                 memory_size=50,
                 random_state=None,
                 batch_size=32,
                 learning_rate=0.001,
                 epsilon=1e-8,
                 max_grad_norm=40.0,
                 evaluation_interval=10,
                 hops=3,
                 embedding_size=20):
        self.data_dir = data_dir
        self.task_id = task_id
        self.model_dir = model_dir
        self.OOV = OOV
        self.memory_size = memory_size
        self.random_state = random_state
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.max_grad_norm = max_grad_norm
        self.evaluation_interval = evaluation_interval
        self.hops = hops
        self.epochs = epochs
        self.embedding_size = embedding_size
        self.source = source
        self.resFlag = resFlag
        self.wrong_conversations = wrong_conversations
        self.error = error
        self.acc_each_epoch = acc_each_epoch
        self.acc_ten_epoch = acc_ten_epoch
        candidates, self.candid2indx = load_candidates(self.data_dir,
                                                       self.task_id)
        self.n_cand = len(candidates)
        print("Candidate Size", self.n_cand)
        self.indx2candid = dict(
            (self.candid2indx[key], key) for key in self.candid2indx)

        # create train, test and validation data
        self.trainData, self.testData, self.valData = load_dialog_task(
            self.data_dir, self.task_id, self.candid2indx, self.OOV)
        data = self.trainData + self.testData + self.valData
        self.build_vocab(data, candidates)

        self.test_acc_list = []
        self.candidates_vec = vectorize_candidates(
            candidates, self.word_idx, self.candidate_sentence_size)
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                           epsilon=self.epsilon)
        self.sess = tf.Session()
        self.model = MemN2NDialog(self.batch_size,
                                  self.vocab_size,
                                  self.n_cand,
                                  self.sentence_size,
                                  self.embedding_size,
                                  self.candidates_vec,
                                  session=self.sess,
                                  hops=self.hops,
                                  max_grad_norm=self.max_grad_norm,
                                  optimizer=optimizer,
                                  task_id=task_id,
                                  source=self.source,
                                  resFlag=self.resFlag,
                                  oov=self.OOV)
        self.saver = tf.train.Saver(max_to_keep=50)
        self.summary_writer = tf.summary.FileWriter(
            self.model.root_dir, self.model.graph_output.graph)
    def __init__(self,
                 data_dir,
                 model_dir,
                 task_id,
                 isInteractive=True,
                 OOV=False,
                 memory_size=250,
                 random_state=None,
                 batch_size=32,
                 learning_rate=0.001,
                 epsilon=1e-8,
                 max_grad_norm=40.0,
                 evaluation_interval=10,
                 hops=3,
                 epochs=200,
                 embedding_size=20,
                 alpha=.5,
                 save_vocab=None,
                 load_vocab=None,
                 verbose=False,
                 load_profiles=None,
                 save_profiles=None):

        self.data_dir = data_dir
        self.task_id = task_id
        self.model_dir = model_dir
        # self.isTrain=isTrain
        self.isInteractive = isInteractive
        self.OOV = OOV
        self.memory_size = memory_size
        self.random_state = random_state
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.max_grad_norm = max_grad_norm
        self.evaluation_interval = evaluation_interval
        self.hops = hops
        self.epochs = epochs
        self.embedding_size = embedding_size
        self.save_vocab = save_vocab
        self.load_vocab = load_vocab
        self.verbose = verbose
        self.alpha = alpha

        # Loading possible answers
        self.candidates, self.candid2indx = load_candidates(
            self.data_dir, self.task_id)
        self.n_cand = len(self.candidates)
        print("Candidate Size", self.n_cand)
        self.indx2candid = dict(
            (self.candid2indx[key], key) for key in self.candid2indx)

        # task data
        self.trainData, self.testData, self.valData = load_dialog_task(
            self.data_dir, self.task_id, self.candid2indx, self.OOV)
        data = self.trainData + self.testData + self.valData

        # Find profiles types
        if load_profiles:
            with open(load_profiles, 'rb') as f:
                self._profiles_mapping = pickle.load(f)
        else:
            self._profiles_mapping = generate_profile_encoding(self.trainData)
            if save_profiles:
                with open(save_profiles, 'wb') as f:
                    pickle.dump(self._profiles_mapping, f)

        profiles_idx_set = set(self._profiles_mapping.values())

        print("Profiles:", self._profiles_mapping)

        # Vocabulary
        self.build_vocab(data, self.candidates, self.save_vocab,
                         self.load_vocab)
        # self.candidates_vec=vectorize_candidates_sparse(self.candidates,self.word_idx)
        self.candidates_vec = vectorize_candidates(
            self.candidates, self.word_idx, self.candidate_sentence_size)

        # Model initialisation
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                           epsilon=self.epsilon)
        self.sess = tf.Session()
        self.model = MemN2NDialog(self.batch_size,
                                  self.vocab_size,
                                  self.n_cand,
                                  self.sentence_size,
                                  self.embedding_size,
                                  self.candidates_vec,
                                  profiles_idx_set,
                                  session=self.sess,
                                  hops=self.hops,
                                  max_grad_norm=self.max_grad_norm,
                                  alpha=alpha,
                                  optimizer=optimizer,
                                  task_id=task_id,
                                  verbose=verbose)
        self.saver = tf.train.Saver(max_to_keep=50)

        # self.summary_writer = tf.train.SummaryWriter(self.model.root_dir, self.model.graph_output.graph)
        self.summary_writer = tf.summary.FileWriter(
            self.model.root_dir, self.model.graph_output.graph)