Exemplo n.º 1
0
    def _convert_batch_para(self, py_batch, mode, prev_a_py=None):
        u_input_np = pad_sequences(py_batch['delex_user'], cfg.max_para_len, padding='post',
                                   truncating='pre').transpose((1, 0))
        delex_para_input_np = pad_sequences(py_batch['delex_para'], cfg.max_para_len, padding='post',
                                            truncating='pre').transpose((1, 0))
        u_len = np.array(py_batch['delex_u_len'])
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
        delex_para_input = cuda_(Variable(torch.from_numpy(delex_para_input_np).long()))
        if mode == 'test':
            if prev_a_py:
                for i in range(len(prev_a_py)):
                    eob = self.reader.vocab.encode('EOS_A')
                    if eob in prev_a_py[i] and prev_a_py[i].index(eob) != len(prev_a_py[i]) - 1:
                        idx = prev_a_py[i].index(eob)
                        prev_a_py[i] = prev_a_py[i][:idx + 1]
                    else:
                        prev_a_py[i] = [eob]
                    for j, word in enumerate(prev_a_py[i]):
                        if word >= cfg.vocab_size or word < 0:
                            prev_a_py[i][j] = 2 #unk
            else:
                prev_a_py = py_batch['pre_dial_act']
            prev_dial_act_input_np = pad_sequences(prev_a_py, cfg.a_length, padding='post', truncating='pre').transpose((1, 0))
            prev_dial_act_input = cuda_(Variable(torch.from_numpy(prev_dial_act_input_np).long()))
        else:
            prev_dial_act_input_np = pad_sequences(py_batch['pre_dial_act'], cfg.a_length, padding='post',
                                                   truncating='pre').transpose((1, 0))
            prev_dial_act_input = cuda_(Variable(torch.from_numpy(prev_dial_act_input_np).long()))

        return u_input, u_input_np, delex_para_input, delex_para_input_np, u_len, prev_dial_act_input
Exemplo n.º 2
0
    def _convert_batch(self, py_batch, prev_z_py=None, mode="train"):
        domain = py_batch['domain']
        if mode == "train":
            u_input_py = py_batch['final_user']
            u_len_py = py_batch['final_u_len']
        else:
            u_input_py = py_batch['user']
            u_len_py = py_batch['u_len']
        kw_ret = {}
        if cfg.prev_z_method == 'concat' and prev_z_py is not None:
            for i in range(len(u_input_py)):
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1:
                    idx = prev_z_py[i].index(eob)
                    u_input_py[i] = prev_z_py[i][:idx + 1] + u_input_py[i]
                else:
                    u_input_py[i] = prev_z_py[i] + u_input_py[i]
                u_len_py[i] = len(u_input_py[i])
                for j, word in enumerate(prev_z_py[i]):
                    if word >= cfg.vocab_size or word < 0:
                        prev_z_py[i][j] = 2 #unk
        elif cfg.prev_z_method == 'separate' and prev_z_py is not None:
            for i in range(len(prev_z_py)):
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1:
                    idx = prev_z_py[i].index(eob)
                    prev_z_py[i] = prev_z_py[i][:idx + 1]
                for j, word in enumerate(prev_z_py[i]):
                    if word >= cfg.vocab_size:
                        prev_z_py[i][j] = 2 #unk
            prev_z_input_np = pad_sequences(prev_z_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
            prev_z_len = np.array([len(_) for _ in prev_z_py])
            prev_z_input = cuda_(Variable(torch.from_numpy(prev_z_input_np).long()))
            kw_ret['prev_z_len'] = prev_z_len
            kw_ret['prev_z_input'] = prev_z_input
            kw_ret['prev_z_input_np'] = prev_z_input_np

        degree_input_np = np.array(py_batch['degree'])
        u_input_np = pad_sequences(u_input_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
        z_input_np = pad_sequences(py_batch['bspan'], padding='post').transpose((1, 0))
        m_input_np = pad_sequences(py_batch['response'], cfg.max_ts, padding='post', truncating='post').transpose(
            (1, 0))

        u_len = np.array(u_len_py)
        m_len = np.array(py_batch['m_len'])

        degree_input = cuda_(Variable(torch.from_numpy(degree_input_np).float()))
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
        z_input = cuda_(Variable(torch.from_numpy(z_input_np).long()))
        m_input = cuda_(Variable(torch.from_numpy(m_input_np).long()))

        kw_ret['z_input_np'] = z_input_np

        return u_input, u_input_np, z_input, m_input, m_input_np, u_len, m_len, degree_input, kw_ret, domain
Exemplo n.º 3
0
    def _convert_input(self, encoded_input):
        u_input_np = pad_sequences([encoded_input],
                                   cfg.max_ts,
                                   padding='post',
                                   truncating='pre').transpose((1, 0))
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))

        u_len = np.array([len(encoded_input)])

        db_found = self.reader._degree_vec_mapping(1)
        degree_input_np = np.array([db_found])
        degree_input = cuda_(
            Variable(torch.from_numpy(degree_input_np).float()))

        return u_input, u_input_np, u_len, degree_input
Exemplo n.º 4
0
 def z2degree(gen_z):
     gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2')
     constraint_request = gen_bspan.split()
     constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \
         in constraint_request else constraint_request
     for j, ent in enumerate(constraints):
         constraints[j] = ent.replace('_', ' ')
     degree = self.reader.db_search(constraints)
     degree_input_list = self.reader._degree_vec_mapping(len(degree))
     degree_input = cuda_(
         Variable(torch.Tensor(degree_input_list).unsqueeze(0)))
     return degree, degree_input
Exemplo n.º 5
0
    def interact(self):
        def z2degree(gen_z):
            gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2')
            constraint_request = gen_bspan.split()
            constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \
                in constraint_request else constraint_request
            for j, ent in enumerate(constraints):
                constraints[j] = ent.replace('_', ' ')
            degree = self.reader.db_search(constraints)
            degree_input_list = self.reader._degree_vec_mapping(len(degree))
            degree_input = cuda_(
                Variable(torch.Tensor(degree_input_list).unsqueeze(0)))
            return degree, degree_input

        self.m.eval()
        print('Start interaction.')
        kw_ret = dict({'func': z2degree})
        while True:
            usr = input('usr: '******'END':
                break
            usr_words = usr.split() + ['EOS_U']
            u_len = np.array([len(usr_words)])
            usr_indices = self.reader.vocab.sentence_encode(usr_words)
            u_input_np = np.array(usr_indices)[:, np.newaxis]
            u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
            m_idx, z_idx, _ = self.m(mode='test',
                                     degree_input=None,
                                     z_input=None,
                                     u_input=u_input,
                                     u_input_np=u_input_np,
                                     u_len=u_len,
                                     m_input=None,
                                     m_input_np=None,
                                     m_len=None,
                                     turn_states=None,
                                     **kw_ret)
            degree = kw_ret.get('degree')
            venue = random.sample(degree, 1)[0] if degree else dict()
            l = [self.reader.vocab.decode(_) for _ in m_idx[0]]
            if 'EOS_M' in l:
                l = l[:l.index('EOS_M')]
            l_origin = []
            for word in l:
                if 'SLOT' in word:
                    word = word[:-5]
                    if word in venue.keys():
                        value = venue[word]
                        if value != '?':
                            l_origin.append(value.replace(' ', '_'))
                else:
                    l_origin.append(word)
            sys = ' '.join(l_origin)
            print('sys:', sys)
            if cfg.prev_z_method == 'separate':
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in z_idx[0] and z_idx[0].index(eob) != len(
                        z_idx[0]) - 1:
                    idx = z_idx[0].index(eob)
                    z_idx[0] = z_idx[0][:idx + 1]
                for j, word in enumerate(z_idx[0]):
                    if word >= cfg.vocab_size:
                        z_idx[0][j] = 2  #unk
                prev_z_input_np = pad_sequences(z_idx,
                                                cfg.max_ts,
                                                padding='post',
                                                truncating='pre').transpose(
                                                    (1, 0))
                prev_z_len = np.array([len(_) for _ in z_idx])
                prev_z_input = cuda_(
                    Variable(torch.from_numpy(prev_z_input_np).long()))
                kw_ret['prev_z_len'] = prev_z_len
                kw_ret['prev_z_input'] = prev_z_input
                kw_ret['prev_z_input_np'] = prev_z_input_np