def _convert_batch(self, py_batch, prev_z_py=None): u_input_py = py_batch['user'] u_len_py = py_batch['u_len'] kw_ret = {} if cfg.prev_z_method == 'concat' and prev_z_py is not None: for i in range(len(u_input_py)): eob = self.reader.vocab.encode('EOS_Z2') if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1: idx = prev_z_py[i].index(eob) u_input_py[i] = prev_z_py[i][:idx + 1] + u_input_py[i] else: u_input_py[i] = prev_z_py[i] + u_input_py[i] u_len_py[i] = len(u_input_py[i]) for j, word in enumerate(prev_z_py[i]): if word >= cfg.vocab_size: prev_z_py[i][j] = 2 #unk elif cfg.prev_z_method == 'separate' and prev_z_py is not None: for i in range(len(prev_z_py)): eob = self.reader.vocab.encode('EOS_Z2') if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1: idx = prev_z_py[i].index(eob) prev_z_py[i] = prev_z_py[i][:idx + 1] for j, word in enumerate(prev_z_py[i]): if word >= cfg.vocab_size: prev_z_py[i][j] = 2 #unk prev_z_input_np = pad_sequences(prev_z_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0)) prev_z_len = np.array([len(_) for _ in prev_z_py]) prev_z_input = cuda_(Variable(torch.from_numpy(prev_z_input_np).long())) kw_ret['prev_z_len'] = prev_z_len kw_ret['prev_z_input'] = prev_z_input kw_ret['prev_z_input_np'] = prev_z_input_np degree_input_np = np.array(py_batch['degree']) u_input_np = pad_sequences(u_input_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0)) z_input_np = pad_sequences(py_batch['bspan'], padding='post').transpose((1, 0)) m_input_np = pad_sequences(py_batch['response'], cfg.max_ts, padding='post', truncating='post').transpose( (1, 0)) u_len = np.array(u_len_py) m_len = np.array(py_batch['m_len']) degree_input = cuda_(Variable(torch.from_numpy(degree_input_np).float())) u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) z_input = cuda_(Variable(torch.from_numpy(z_input_np).long())) m_input = cuda_(Variable(torch.from_numpy(m_input_np).long())) kw_ret['z_input_np'] = z_input_np return u_input, u_input_np, z_input, m_input, m_input_np,u_len, m_len, \ degree_input, kw_ret
def predict(self, usr): print('usr:'******'EOS_U'] u_len = np.array([len(usr_words)]) usr_indices = self.m.reader.vocab.sentence_encode(usr_words) u_input_np = np.array(usr_indices)[:, np.newaxis] u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) m_idx, z_idx, degree = self.m.m(mode='test', degree_input=None, z_input=None, u_input=u_input, u_input_np=u_input_np, u_len=u_len, m_input=None, m_input_np=None, m_len=None, turn_states=None, **self.kw_ret) venue = random.sample(degree, 1)[0] if degree else dict() l = [self.m.reader.vocab.decode(_) for _ in m_idx[0]] if 'EOS_M' in l: l = l[:l.index('EOS_M')] l_origin = [] for word in l: if 'SLOT' in word: word = word[:-5] if word in venue.keys(): value = venue[word] if value != '?': l_origin.append(value) elif word.endswith('reference]'): if 'ref' in venue: l_origin.append(venue['ref']) else: l_origin.append(word) sys = ' '.join(l_origin) sys = denormalize(sys) print('sys:', sys) if cfg.prev_z_method == 'separate': eob = self.m.reader.vocab.encode('EOS_Z2') if eob in z_idx[0] and z_idx[0].index(eob) != len(z_idx[0]) - 1: idx = z_idx[0].index(eob) z_idx[0] = z_idx[0][:idx + 1] for j, word in enumerate(z_idx[0]): if word >= cfg.vocab_size: z_idx[0][j] = 2 #unk prev_z_input_np = pad_sequences(z_idx, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0)) prev_z_len = np.array([len(_) for _ in z_idx]) prev_z_input = cuda_( Variable(torch.from_numpy(prev_z_input_np).long())) self.kw_ret['prev_z_len'] = prev_z_len self.kw_ret['prev_z_input'] = prev_z_input self.kw_ret['prev_z_input_np'] = prev_z_input_np return sys
def greedy_decode(self, pz_dec_outs, u_enc_out, m_tm1, u_input_np, last_hidden, degree_input, bspan_index): decoded = [] bspan_index_np = pad_sequences(bspan_index).transpose((1, 0)) for t in range(self.max_ts): proba, last_hidden, _ = self.m_decoder(pz_dec_outs, u_enc_out, u_input_np, m_tm1, degree_input, last_hidden, bspan_index_np) proba = torch.cat((proba[:, :2], proba[:, 3:]), 1) mt_proba, mt_index = torch.topk(proba, 1) # [B,1] mt_index.add_(mt_index.ge(2).long()) mt_index = mt_index.data.view(-1) decoded.append(mt_index.clone()) for i in range(mt_index.size(0)): if mt_index[i] >= cfg.vocab_size: mt_index[i] = 2 # unk m_tm1 = cuda_(Variable(mt_index).view(1, -1)) decoded = torch.stack(decoded, dim=0).transpose(0, 1) decoded = list(decoded) return [list(_) for _ in decoded]
def predict(self, usr, kw_ret): def z2degree(gen_z): gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2') constraint_request = gen_bspan.split() constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \ in constraint_request else constraint_request for j, ent in enumerate(constraints): constraints[j] = ent.replace('_', ' ') degree = self.reader.db_search(constraints) degree_input_list = self.reader._degree_vec_mapping(len(degree)) degree_input = cuda_( Variable(torch.Tensor(degree_input_list).unsqueeze(0))) return degree, degree_input self.m.eval() kw_ret['func'] = z2degree if 'prev_z_input_np' in kw_ret: kw_ret['prev_z_len'] = np.array(kw_ret['prev_z_len']) kw_ret['prev_z_input_np'] = np.array(kw_ret['prev_z_input_np']) kw_ret['prev_z_input'] = cuda_( Variable(torch.Tensor(kw_ret['prev_z_input_np']).long())) usr = word_tokenize(usr.lower()) usr_words = usr + ['EOS_U'] u_len = np.array([len(usr_words)]) usr_indices = self.reader.vocab.sentence_encode(usr_words) u_input_np = np.array(usr_indices)[:, np.newaxis] u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) m_idx, z_idx, degree = self.m(mode='test', degree_input=None, z_input=None, u_input=u_input, u_input_np=u_input_np, u_len=u_len, m_input=None, m_input_np=None, m_len=None, turn_states=None, **kw_ret) venue = random.sample(degree, 1)[0] if degree else dict() l = [self.reader.vocab.decode(_) for _ in m_idx[0]] if 'EOS_M' in l: l = l[:l.index('EOS_M')] l_origin = [] for word in l: if 'SLOT' in word: word = word[:-5] if word in venue.keys(): value = venue[word] if value != '?': l_origin.append(value.replace(' ', '_')) else: l_origin.append(word) sys = ' '.join(l_origin) kw_ret['sys'] = sys if cfg.prev_z_method == 'separate': eob = self.reader.vocab.encode('EOS_Z2') if eob in z_idx[0] and z_idx[0].index(eob) != len(z_idx[0]) - 1: idx = z_idx[0].index(eob) z_idx[0] = z_idx[0][:idx + 1] for j, word in enumerate(z_idx[0]): if word >= cfg.vocab_size: z_idx[0][j] = 2 #unk prev_z_input_np = pad_sequences(z_idx, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0)) prev_z_len = np.array([len(_) for _ in z_idx]) kw_ret['prev_z_len'] = prev_z_len.tolist() kw_ret['prev_z_input_np'] = prev_z_input_np.tolist() if 'prev_z_input' in kw_ret: del kw_ret['prev_z_input'] del kw_ret['func'] return kw_ret
def interact(self): def z2degree(gen_z): gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2') constraint_request = gen_bspan.split() constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \ in constraint_request else constraint_request for j, ent in enumerate(constraints): constraints[j] = ent.replace('_', ' ') degree = self.reader.db_search(constraints) degree_input_list = self.reader._degree_vec_mapping(len(degree)) degree_input = cuda_( Variable(torch.Tensor(degree_input_list).unsqueeze(0))) return degree, degree_input def denormalize(uttr): uttr = uttr.replace(' -s', 's') uttr = uttr.replace(' -ly', 'ly') uttr = uttr.replace(' -er', 'er') return uttr self.m.eval() print('Start interaction.') kw_ret = dict({'func': z2degree}) while True: usr = input('usr: '******'END': break if usr == 'RESET': kw_ret = dict({'func': z2degree}) continue usr = word_tokenize(usr.lower()) usr_words = usr + ['EOS_U'] u_len = np.array([len(usr_words)]) usr_indices = self.reader.vocab.sentence_encode(usr_words) u_input_np = np.array(usr_indices)[:, np.newaxis] u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) m_idx, z_idx, degree = self.m(mode='test', degree_input=None, z_input=None, u_input=u_input, u_input_np=u_input_np, u_len=u_len, m_input=None, m_input_np=None, m_len=None, turn_states=None, **kw_ret) venue = random.sample(degree, 1)[0] if degree else dict() l = [self.reader.vocab.decode(_) for _ in m_idx[0]] if 'EOS_M' in l: l = l[:l.index('EOS_M')] l_origin = [] for word in l: if 'SLOT' in word: word = word[:-5] if word in venue.keys(): value = venue[word] if value != '?': l_origin.append(value) else: l_origin.append(word) sys = ' '.join(l_origin) sys = denormalize(sys) print('sys:', sys) if cfg.prev_z_method == 'separate': eob = self.reader.vocab.encode('EOS_Z2') if eob in z_idx[0] and z_idx[0].index(eob) != len( z_idx[0]) - 1: idx = z_idx[0].index(eob) z_idx[0] = z_idx[0][:idx + 1] for j, word in enumerate(z_idx[0]): if word >= cfg.vocab_size: z_idx[0][j] = 2 #unk prev_z_input_np = pad_sequences(z_idx, cfg.max_ts, padding='post', truncating='pre').transpose( (1, 0)) prev_z_len = np.array([len(_) for _ in z_idx]) prev_z_input = cuda_( Variable(torch.from_numpy(prev_z_input_np).long())) kw_ret['prev_z_len'] = prev_z_len kw_ret['prev_z_input'] = prev_z_input kw_ret['prev_z_input_np'] = prev_z_input_np