示例#1
0
    def _convert_batch(self, py_batch, prev_z_py=None):
        u_input_py = py_batch['user']
        u_len_py = py_batch['u_len']
        kw_ret = {}
        if cfg.prev_z_method == 'concat' and prev_z_py is not None:
            for i in range(len(u_input_py)):
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1:
                    idx = prev_z_py[i].index(eob)
                    u_input_py[i] = prev_z_py[i][:idx + 1] + u_input_py[i]
                else:
                    u_input_py[i] = prev_z_py[i] + u_input_py[i]
                u_len_py[i] = len(u_input_py[i])
                for j, word in enumerate(prev_z_py[i]):
                    if word >= cfg.vocab_size:
                        prev_z_py[i][j] = 2 #unk
        elif cfg.prev_z_method == 'separate' and prev_z_py is not None:
            for i in range(len(prev_z_py)):
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1:
                    idx = prev_z_py[i].index(eob)
                    prev_z_py[i] = prev_z_py[i][:idx + 1]
                for j, word in enumerate(prev_z_py[i]):
                    if word >= cfg.vocab_size:
                        prev_z_py[i][j] = 2 #unk
            prev_z_input_np = pad_sequences(prev_z_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
            prev_z_len = np.array([len(_) for _ in prev_z_py])
            prev_z_input = cuda_(Variable(torch.from_numpy(prev_z_input_np).long()))
            kw_ret['prev_z_len'] = prev_z_len
            kw_ret['prev_z_input'] = prev_z_input
            kw_ret['prev_z_input_np'] = prev_z_input_np

        degree_input_np = np.array(py_batch['degree'])
        u_input_np = pad_sequences(u_input_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
        z_input_np = pad_sequences(py_batch['bspan'], padding='post').transpose((1, 0))
        m_input_np = pad_sequences(py_batch['response'], cfg.max_ts, padding='post', truncating='post').transpose(
            (1, 0))

        u_len = np.array(u_len_py)
        m_len = np.array(py_batch['m_len'])

        degree_input = cuda_(Variable(torch.from_numpy(degree_input_np).float()))
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
        z_input = cuda_(Variable(torch.from_numpy(z_input_np).long()))
        m_input = cuda_(Variable(torch.from_numpy(m_input_np).long()))

        kw_ret['z_input_np'] = z_input_np

        return u_input, u_input_np, z_input, m_input, m_input_np,u_len, m_len,  \
               degree_input, kw_ret
示例#2
0
 def predict(self, usr):
     print('usr:'******'EOS_U']
     u_len = np.array([len(usr_words)])
     usr_indices = self.m.reader.vocab.sentence_encode(usr_words)
     u_input_np = np.array(usr_indices)[:, np.newaxis]
     u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
     m_idx, z_idx, degree = self.m.m(mode='test',
                                     degree_input=None,
                                     z_input=None,
                                     u_input=u_input,
                                     u_input_np=u_input_np,
                                     u_len=u_len,
                                     m_input=None,
                                     m_input_np=None,
                                     m_len=None,
                                     turn_states=None,
                                     **self.kw_ret)
     venue = random.sample(degree, 1)[0] if degree else dict()
     l = [self.m.reader.vocab.decode(_) for _ in m_idx[0]]
     if 'EOS_M' in l:
         l = l[:l.index('EOS_M')]
     l_origin = []
     for word in l:
         if 'SLOT' in word:
             word = word[:-5]
             if word in venue.keys():
                 value = venue[word]
                 if value != '?':
                     l_origin.append(value)
         elif word.endswith('reference]'):
             if 'ref' in venue:
                 l_origin.append(venue['ref'])
         else:
             l_origin.append(word)
     sys = ' '.join(l_origin)
     sys = denormalize(sys)
     print('sys:', sys)
     if cfg.prev_z_method == 'separate':
         eob = self.m.reader.vocab.encode('EOS_Z2')
         if eob in z_idx[0] and z_idx[0].index(eob) != len(z_idx[0]) - 1:
             idx = z_idx[0].index(eob)
             z_idx[0] = z_idx[0][:idx + 1]
         for j, word in enumerate(z_idx[0]):
             if word >= cfg.vocab_size:
                 z_idx[0][j] = 2  #unk
         prev_z_input_np = pad_sequences(z_idx,
                                         cfg.max_ts,
                                         padding='post',
                                         truncating='pre').transpose((1, 0))
         prev_z_len = np.array([len(_) for _ in z_idx])
         prev_z_input = cuda_(
             Variable(torch.from_numpy(prev_z_input_np).long()))
         self.kw_ret['prev_z_len'] = prev_z_len
         self.kw_ret['prev_z_input'] = prev_z_input
         self.kw_ret['prev_z_input_np'] = prev_z_input_np
     return sys
示例#3
0
 def greedy_decode(self, pz_dec_outs, u_enc_out, m_tm1, u_input_np,
                   last_hidden, degree_input, bspan_index):
     decoded = []
     bspan_index_np = pad_sequences(bspan_index).transpose((1, 0))
     for t in range(self.max_ts):
         proba, last_hidden, _ = self.m_decoder(pz_dec_outs, u_enc_out,
                                                u_input_np, m_tm1,
                                                degree_input, last_hidden,
                                                bspan_index_np)
         proba = torch.cat((proba[:, :2], proba[:, 3:]), 1)
         mt_proba, mt_index = torch.topk(proba, 1)  # [B,1]
         mt_index.add_(mt_index.ge(2).long())
         mt_index = mt_index.data.view(-1)
         decoded.append(mt_index.clone())
         for i in range(mt_index.size(0)):
             if mt_index[i] >= cfg.vocab_size:
                 mt_index[i] = 2  # unk
         m_tm1 = cuda_(Variable(mt_index).view(1, -1))
     decoded = torch.stack(decoded, dim=0).transpose(0, 1)
     decoded = list(decoded)
     return [list(_) for _ in decoded]
示例#4
0
    def predict(self, usr, kw_ret):
        def z2degree(gen_z):
            gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2')
            constraint_request = gen_bspan.split()
            constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \
                in constraint_request else constraint_request
            for j, ent in enumerate(constraints):
                constraints[j] = ent.replace('_', ' ')
            degree = self.reader.db_search(constraints)
            degree_input_list = self.reader._degree_vec_mapping(len(degree))
            degree_input = cuda_(
                Variable(torch.Tensor(degree_input_list).unsqueeze(0)))
            return degree, degree_input

        self.m.eval()

        kw_ret['func'] = z2degree
        if 'prev_z_input_np' in kw_ret:
            kw_ret['prev_z_len'] = np.array(kw_ret['prev_z_len'])
            kw_ret['prev_z_input_np'] = np.array(kw_ret['prev_z_input_np'])
            kw_ret['prev_z_input'] = cuda_(
                Variable(torch.Tensor(kw_ret['prev_z_input_np']).long()))

        usr = word_tokenize(usr.lower())

        usr_words = usr + ['EOS_U']
        u_len = np.array([len(usr_words)])
        usr_indices = self.reader.vocab.sentence_encode(usr_words)
        u_input_np = np.array(usr_indices)[:, np.newaxis]
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
        m_idx, z_idx, degree = self.m(mode='test',
                                      degree_input=None,
                                      z_input=None,
                                      u_input=u_input,
                                      u_input_np=u_input_np,
                                      u_len=u_len,
                                      m_input=None,
                                      m_input_np=None,
                                      m_len=None,
                                      turn_states=None,
                                      **kw_ret)
        venue = random.sample(degree, 1)[0] if degree else dict()
        l = [self.reader.vocab.decode(_) for _ in m_idx[0]]
        if 'EOS_M' in l:
            l = l[:l.index('EOS_M')]
        l_origin = []
        for word in l:
            if 'SLOT' in word:
                word = word[:-5]
                if word in venue.keys():
                    value = venue[word]
                    if value != '?':
                        l_origin.append(value.replace(' ', '_'))
            else:
                l_origin.append(word)
        sys = ' '.join(l_origin)
        kw_ret['sys'] = sys
        if cfg.prev_z_method == 'separate':
            eob = self.reader.vocab.encode('EOS_Z2')
            if eob in z_idx[0] and z_idx[0].index(eob) != len(z_idx[0]) - 1:
                idx = z_idx[0].index(eob)
                z_idx[0] = z_idx[0][:idx + 1]
            for j, word in enumerate(z_idx[0]):
                if word >= cfg.vocab_size:
                    z_idx[0][j] = 2  #unk
            prev_z_input_np = pad_sequences(z_idx,
                                            cfg.max_ts,
                                            padding='post',
                                            truncating='pre').transpose((1, 0))
            prev_z_len = np.array([len(_) for _ in z_idx])
            kw_ret['prev_z_len'] = prev_z_len.tolist()
            kw_ret['prev_z_input_np'] = prev_z_input_np.tolist()
            if 'prev_z_input' in kw_ret:
                del kw_ret['prev_z_input']

        del kw_ret['func']

        return kw_ret
示例#5
0
    def interact(self):
        def z2degree(gen_z):
            gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2')
            constraint_request = gen_bspan.split()
            constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \
                in constraint_request else constraint_request
            for j, ent in enumerate(constraints):
                constraints[j] = ent.replace('_', ' ')
            degree = self.reader.db_search(constraints)
            degree_input_list = self.reader._degree_vec_mapping(len(degree))
            degree_input = cuda_(
                Variable(torch.Tensor(degree_input_list).unsqueeze(0)))
            return degree, degree_input

        def denormalize(uttr):
            uttr = uttr.replace(' -s', 's')
            uttr = uttr.replace(' -ly', 'ly')
            uttr = uttr.replace(' -er', 'er')
            return uttr

        self.m.eval()
        print('Start interaction.')
        kw_ret = dict({'func': z2degree})
        while True:
            usr = input('usr: '******'END':
                break
            if usr == 'RESET':
                kw_ret = dict({'func': z2degree})
                continue
            usr = word_tokenize(usr.lower())
            usr_words = usr + ['EOS_U']
            u_len = np.array([len(usr_words)])
            usr_indices = self.reader.vocab.sentence_encode(usr_words)
            u_input_np = np.array(usr_indices)[:, np.newaxis]
            u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
            m_idx, z_idx, degree = self.m(mode='test',
                                          degree_input=None,
                                          z_input=None,
                                          u_input=u_input,
                                          u_input_np=u_input_np,
                                          u_len=u_len,
                                          m_input=None,
                                          m_input_np=None,
                                          m_len=None,
                                          turn_states=None,
                                          **kw_ret)
            venue = random.sample(degree, 1)[0] if degree else dict()
            l = [self.reader.vocab.decode(_) for _ in m_idx[0]]
            if 'EOS_M' in l:
                l = l[:l.index('EOS_M')]
            l_origin = []
            for word in l:
                if 'SLOT' in word:
                    word = word[:-5]
                    if word in venue.keys():
                        value = venue[word]
                        if value != '?':
                            l_origin.append(value)
                else:
                    l_origin.append(word)
            sys = ' '.join(l_origin)
            sys = denormalize(sys)
            print('sys:', sys)
            if cfg.prev_z_method == 'separate':
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in z_idx[0] and z_idx[0].index(eob) != len(
                        z_idx[0]) - 1:
                    idx = z_idx[0].index(eob)
                    z_idx[0] = z_idx[0][:idx + 1]
                for j, word in enumerate(z_idx[0]):
                    if word >= cfg.vocab_size:
                        z_idx[0][j] = 2  #unk
                prev_z_input_np = pad_sequences(z_idx,
                                                cfg.max_ts,
                                                padding='post',
                                                truncating='pre').transpose(
                                                    (1, 0))
                prev_z_len = np.array([len(_) for _ in z_idx])
                prev_z_input = cuda_(
                    Variable(torch.from_numpy(prev_z_input_np).long()))
                kw_ret['prev_z_len'] = prev_z_len
                kw_ret['prev_z_input'] = prev_z_input
                kw_ret['prev_z_input_np'] = prev_z_input_np