def _convert_batch_para(self, py_batch, mode, prev_a_py=None): u_input_np = pad_sequences(py_batch['delex_user'], cfg.max_para_len, padding='post', truncating='pre').transpose((1, 0)) delex_para_input_np = pad_sequences(py_batch['delex_para'], cfg.max_para_len, padding='post', truncating='pre').transpose((1, 0)) u_len = np.array(py_batch['delex_u_len']) u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) delex_para_input = cuda_(Variable(torch.from_numpy(delex_para_input_np).long())) if mode == 'test': if prev_a_py: for i in range(len(prev_a_py)): eob = self.reader.vocab.encode('EOS_A') if eob in prev_a_py[i] and prev_a_py[i].index(eob) != len(prev_a_py[i]) - 1: idx = prev_a_py[i].index(eob) prev_a_py[i] = prev_a_py[i][:idx + 1] else: prev_a_py[i] = [eob] for j, word in enumerate(prev_a_py[i]): if word >= cfg.vocab_size or word < 0: prev_a_py[i][j] = 2 #unk else: prev_a_py = py_batch['pre_dial_act'] prev_dial_act_input_np = pad_sequences(prev_a_py, cfg.a_length, padding='post', truncating='pre').transpose((1, 0)) prev_dial_act_input = cuda_(Variable(torch.from_numpy(prev_dial_act_input_np).long())) else: prev_dial_act_input_np = pad_sequences(py_batch['pre_dial_act'], cfg.a_length, padding='post', truncating='pre').transpose((1, 0)) prev_dial_act_input = cuda_(Variable(torch.from_numpy(prev_dial_act_input_np).long())) return u_input, u_input_np, delex_para_input, delex_para_input_np, u_len, prev_dial_act_input
def _convert_batch(self, py_batch, prev_z_py=None, mode="train"): domain = py_batch['domain'] if mode == "train": u_input_py = py_batch['final_user'] u_len_py = py_batch['final_u_len'] else: u_input_py = py_batch['user'] u_len_py = py_batch['u_len'] kw_ret = {} if cfg.prev_z_method == 'concat' and prev_z_py is not None: for i in range(len(u_input_py)): eob = self.reader.vocab.encode('EOS_Z2') if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1: idx = prev_z_py[i].index(eob) u_input_py[i] = prev_z_py[i][:idx + 1] + u_input_py[i] else: u_input_py[i] = prev_z_py[i] + u_input_py[i] u_len_py[i] = len(u_input_py[i]) for j, word in enumerate(prev_z_py[i]): if word >= cfg.vocab_size or word < 0: prev_z_py[i][j] = 2 #unk elif cfg.prev_z_method == 'separate' and prev_z_py is not None: for i in range(len(prev_z_py)): eob = self.reader.vocab.encode('EOS_Z2') if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1: idx = prev_z_py[i].index(eob) prev_z_py[i] = prev_z_py[i][:idx + 1] for j, word in enumerate(prev_z_py[i]): if word >= cfg.vocab_size: prev_z_py[i][j] = 2 #unk prev_z_input_np = pad_sequences(prev_z_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0)) prev_z_len = np.array([len(_) for _ in prev_z_py]) prev_z_input = cuda_(Variable(torch.from_numpy(prev_z_input_np).long())) kw_ret['prev_z_len'] = prev_z_len kw_ret['prev_z_input'] = prev_z_input kw_ret['prev_z_input_np'] = prev_z_input_np degree_input_np = np.array(py_batch['degree']) u_input_np = pad_sequences(u_input_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0)) z_input_np = pad_sequences(py_batch['bspan'], padding='post').transpose((1, 0)) m_input_np = pad_sequences(py_batch['response'], cfg.max_ts, padding='post', truncating='post').transpose( (1, 0)) u_len = np.array(u_len_py) m_len = np.array(py_batch['m_len']) degree_input = cuda_(Variable(torch.from_numpy(degree_input_np).float())) u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) z_input = cuda_(Variable(torch.from_numpy(z_input_np).long())) m_input = cuda_(Variable(torch.from_numpy(m_input_np).long())) kw_ret['z_input_np'] = z_input_np return u_input, u_input_np, z_input, m_input, m_input_np, u_len, m_len, degree_input, kw_ret, domain
def _convert_input(self, encoded_input): u_input_np = pad_sequences([encoded_input], cfg.max_ts, padding='post', truncating='pre').transpose((1, 0)) u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) u_len = np.array([len(encoded_input)]) db_found = self.reader._degree_vec_mapping(1) degree_input_np = np.array([db_found]) degree_input = cuda_( Variable(torch.from_numpy(degree_input_np).float())) return u_input, u_input_np, u_len, degree_input
def z2degree(gen_z): gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2') constraint_request = gen_bspan.split() constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \ in constraint_request else constraint_request for j, ent in enumerate(constraints): constraints[j] = ent.replace('_', ' ') degree = self.reader.db_search(constraints) degree_input_list = self.reader._degree_vec_mapping(len(degree)) degree_input = cuda_( Variable(torch.Tensor(degree_input_list).unsqueeze(0))) return degree, degree_input
def interact(self): def z2degree(gen_z): gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2') constraint_request = gen_bspan.split() constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \ in constraint_request else constraint_request for j, ent in enumerate(constraints): constraints[j] = ent.replace('_', ' ') degree = self.reader.db_search(constraints) degree_input_list = self.reader._degree_vec_mapping(len(degree)) degree_input = cuda_( Variable(torch.Tensor(degree_input_list).unsqueeze(0))) return degree, degree_input self.m.eval() print('Start interaction.') kw_ret = dict({'func': z2degree}) while True: usr = input('usr: '******'END': break usr_words = usr.split() + ['EOS_U'] u_len = np.array([len(usr_words)]) usr_indices = self.reader.vocab.sentence_encode(usr_words) u_input_np = np.array(usr_indices)[:, np.newaxis] u_input = cuda_(Variable(torch.from_numpy(u_input_np).long())) m_idx, z_idx, _ = self.m(mode='test', degree_input=None, z_input=None, u_input=u_input, u_input_np=u_input_np, u_len=u_len, m_input=None, m_input_np=None, m_len=None, turn_states=None, **kw_ret) degree = kw_ret.get('degree') venue = random.sample(degree, 1)[0] if degree else dict() l = [self.reader.vocab.decode(_) for _ in m_idx[0]] if 'EOS_M' in l: l = l[:l.index('EOS_M')] l_origin = [] for word in l: if 'SLOT' in word: word = word[:-5] if word in venue.keys(): value = venue[word] if value != '?': l_origin.append(value.replace(' ', '_')) else: l_origin.append(word) sys = ' '.join(l_origin) print('sys:', sys) if cfg.prev_z_method == 'separate': eob = self.reader.vocab.encode('EOS_Z2') if eob in z_idx[0] and z_idx[0].index(eob) != len( z_idx[0]) - 1: idx = z_idx[0].index(eob) z_idx[0] = z_idx[0][:idx + 1] for j, word in enumerate(z_idx[0]): if word >= cfg.vocab_size: z_idx[0][j] = 2 #unk prev_z_input_np = pad_sequences(z_idx, cfg.max_ts, padding='post', truncating='pre').transpose( (1, 0)) prev_z_len = np.array([len(_) for _ in z_idx]) prev_z_input = cuda_( Variable(torch.from_numpy(prev_z_input_np).long())) kw_ret['prev_z_len'] = prev_z_len kw_ret['prev_z_input'] = prev_z_input kw_ret['prev_z_input_np'] = prev_z_input_np