def search(self, seq, n_samples, ignore_unk=False, minlen=1): c = self.comp_repr(seq)[0] states = map(lambda x : x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) log_probs = numpy.log(self.comp_next_probs(c, k, last_words, *states)[0]) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:,self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) states = map(lambda x : x[indices], new_states) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 500: logger.warning("Still no translations: try beam size {}".format(n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs
def search(self, seq, n_samples, ignore_unk=False, minlen=1, compute_alignment=False, have_source = False): x = [] last_split = -1 for i in xrange(len(seq)): if seq[i] == self.split_id: tmp = copy.deepcopy(seq[last_split+1:i+1]) x.append(tmp) last_split = i assert self.num_systems == len(x) for i in xrange(self.num_systems): x[i][-1]=self.source_eos_id c = self.comp_repr(*x)#[0] ''' print len(c) for i in c: print i.shape ''' #print self.get_sample(1,5,1,*x) states = map(lambda x : x[None, :], self.comp_init_states(*c)) #c = numpy.concatenate(c, axis=0) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] if have_source: minlen = (len(x[0])-1)/2 #print minlen else: minlen = (len(seq)-self.num_systems)/self.num_systems/2 if compute_alignment: fin_aligns = [] aligns = [[]] for k in range(6 * minlen): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) if compute_alignment: align = self.comp_align(k, last_words, *(states+c)) log_probs = numpy.log(self.comp_next_probs(k, last_words, *(states+c))[0]) #print log_probs # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:,self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] #print best_costs_indices # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] #print trans_indices #print word_indices # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)] if compute_alignment: new_aligns = [[]] * n_samples inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost if compute_alignment: new_aligns[i] = aligns[orig_idx]+[align[:,orig_idx]] for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(k, inputs, *(new_states+c)) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] aligns = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) if compute_alignment: aligns.append(new_aligns[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) if compute_alignment: fin_aligns.append(new_aligns[i]) states = map(lambda x : x[indices], new_states) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 100: logger.warning("Still no translations: try beam size {}".format(n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] if compute_alignment: fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) if compute_alignment: return fin_trans, fin_aligns, fin_costs else: return fin_trans, fin_costs
def search(self, seq, n_samples, eos_id, unk_id, ignore_unk=False, minlen=1, final=False): num_models = len(self.enc_decs) c = [] for i in xrange(num_models): c.append(self.comp_repr[i](seq)[0]) states = [] for i in xrange(num_models): states.append( map(lambda x: x[None, :], self.comp_init_states[i](c[i]))) dim = states[0][0].shape[1] num_levels = len(states[0]) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) #log_probs = (numpy.log(self.comp_next_probs_0(c, k, last_words, *states)[0]) + numpy.log(self.comp_next_probs_1(c, k, last_words, *states)[0]))/2. log_probs = sum( numpy.log(self.comp_next_probs[i] (c[i], k, last_words, *states[i])[0]) for i in xrange(num_models)) / num_models # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:, eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [] for i in xrange(num_models): new_states.append([ numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels) ]) inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost for level in range(num_levels): for j in xrange(num_models): new_states[j][level][i] = states[j][level][orig_idx] inputs[i] = next_word for i in xrange(num_models): new_states[i] = self.comp_next_states[i](c[i], k, inputs, *new_states[i]) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != eos_id: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) for i in xrange(num_models): states[i] = map(lambda x: x[indices], new_states[i]) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, eos_id=eos_id, unk_id=unk_id, ignore_unk=False, minlen=minlen, final=final) elif not final: logger.warning( "No appropriate translations: using larger vocabulary") raise RuntimeError else: logger.warning( "No appropriate translation: return empty translation") fin_trans = [[]] fin_costs = [0.0] fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs
def search_with_truth(self, seq, truth, n_samples, ignore_unk=False, minlen=1, idict=None): for ww in truth: print idict[ww], print '' c = self.comp_repr(seq)[0] # one representation at each encoding time step # c.shape[0] = len(seq) # states is a dim_dimensional vector, output of initialization unit states = map(lambda x : x[None, :], self.comp_init_states(c)) # dimension of hidden layer dim = states[0].shape[1] # always 1 in case of non-deep GRU num_levels = len(states) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] # maximum translation length allowed is 3*len(source) for k in range(3 * len(seq)): if n_samples == 0: # all translation ended break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) log_probs = numpy.log(self.comp_next_probs(c, k, last_words, *states)[0]) print str(k) + '\t' + '|', if k > 0 and k <= len(truth): if truth[k - 1] < 30000: print idict[truth[k - 1]] + '\t' + '|', else: print '<EOS>' + '\t', for ww in last_words: print idict[ww] + ' ', print '' else: print last_words # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:,self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] # which beam? trans_indices = best_costs_indices / voc_size # which word? word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * (n_samples) new_costs = numpy.zeros(n_samples) new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) # beam size is naturally reduced when multiple best # new trans came from same beam states = map(lambda x : x[indices], new_states) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search_with_truth(seq, truth, n_samples, False, minlen, idict) elif n_samples < 500: logger.warning("Still no translations: try beam size {}".format(n_samples * 2)) return self.search_with_truth(seq, truth, n_samples * 2, False, minlen, idict) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs
def search(self, seq, n_samples, ignore_unk=False, minlen=1): print seq rw, ww = self.enc_dec.view_encoder_weight()(seq) #print rw.shape #print ww.shape #print rw #print ww #visual2d(rw[:,:len(seq)]) #visual2d(ww[:,:len(seq)]) visual2d(rw) visual2d(ww) c, m = self.comp_repr(seq) #visual2d(c) #visual2d(m[0]) visual2d(m[-1]) #print c #print m print numpy.abs(c).sum(axis=1) print numpy.abs(m).sum(axis=2).sum(axis=1) states = map(lambda x : x[None, :], self.comp_init_states(c)) #print states mem = m[-1:] dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] derw = [] deww = [] for k in range(3 * len(seq)): #raw_input('press any key to continue') if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) log_probs = numpy.log(self.comp_next_probs(c, k, last_words, mem, *states)[0]) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:,self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)] new_mem = numpy.zeros((n_samples,mem.shape[1],mem.shape[2]), dtype="float32") inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_mem[i] = mem[orig_idx] h,mem,rw,ww = self.comp_next_debug(c, k, inputs, mem, *states) print h.shape,mem.shape,rw.shape,ww.shape derw.append(rw[0]) deww.append(ww[0]) result = self.comp_next_states(c, k, inputs, new_mem,*new_states) new_states =[result[0]] new_mem = result[1] #print new_states print new_mem.shape #visual2d(new_mem[0]) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) states = map(lambda x : x[indices], new_states) mem = map(lambda x : x[indices], [new_mem])[0] print '--decoder weight--' #print derw #visual2d(numpy.asarray(derw)[:,:len(seq)]) #visual2d(numpy.asarray(deww)[:,:len(seq)]) visual2d(numpy.asarray(derw)) visual2d(numpy.asarray(deww)) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 500: logger.warning("Still no translations: try beam size {}".format(n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs
def search(self, sen, seq, n_samples, ignore_unk=False, minlen=1): src_seq = sen.split(' ') uni_trans_set = self.get_uni_trans(src_seq) #print >> sys.stderr, uni_trans_set c = self.comp_repr(seq)[0] states = map(lambda x : x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] fin_str_trans = [] #fin_infos = [] fin_aligns = [] fin_lm_costs = [] fin_tm_costs = [] fin_rnn_costs = [] fin_unk_nums = [] trans = [[]] costs = [0.0] str_trans = [[]] #infos = [[]] lm_costs = [[]] tm_costs = [[]] rnn_costs = [[]] unk_nums = [[]] aligns = [[]] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) next_probs, aln_score_mat = self.comp_next_probs(c, k, last_words, *states) log_probs = numpy.log(next_probs) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,self.unk_id] = -numpy.inf if k < minlen: log_probs[:,self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs * self.weight_rnn flat_next_costs = next_costs.flatten() cands_costs_indices = argpartition( flat_next_costs.flatten(), n_samples * 100)[:n_samples * 100] # Decypher flatten indices voc_size = log_probs.shape[1] cands_trans_indices = cands_costs_indices / voc_size cands_word_indices = cands_costs_indices % voc_size cands_costs = flat_next_costs[cands_costs_indices] cands_lm_costs = numpy.zeros(len(cands_costs)) cands_tm_costs = numpy.zeros(len(cands_costs)) cands_unk_nums = numpy.zeros(len(cands_costs)) cands_rnn_costs = (-1 * log_probs).flatten()[cands_costs_indices] unk_trans = {} #add SMT feature scores to costs for i, (orig_idx, next_word, next_cost) in enumerate( zip(cands_trans_indices, cands_word_indices, cands_costs)): sorted_aln_idx = numpy.argsort(-aln_score_mat[:,orig_idx])[:3] aln_score_array = [] for aln_idx in sorted_aln_idx: aln_score_array.append([aln_idx, aln_score_mat[aln_idx, orig_idx]]) lm_score = -self.get_lm_score(str_trans[orig_idx] + self.trg_i2w([next_word])) #tm_score, unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], self.trg_i2w([next_word])[0]) tm_score, unk_tm_num, _ = self.get_tm_score_new(src_seq, aln_score_array, self.trg_i2w([next_word])[0]) tm_score = -tm_score if next_word == self.unk_id: #lm_score = numpy.inf #tm_score = numpy.inf unk_trans[orig_idx] = 'UNK' _unk_score = tm_score for t in uni_trans_set: _ls = -self.get_lm_score(str_trans[orig_idx] + [t]) #_ts, _unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], t) _ts, _unk_tm_num, match_idx = self.get_tm_score_new(src_seq, aln_score_array, t) _ts = -_ts if match_idx == 0 and _ls * self.weight_lm + _ts * self.weight_tm < lm_score * self.weight_lm + tm_score * self.weight_tm: lm_score = _ls tm_score = _ts unk_tm_num = _unk_tm_num unk_trans[orig_idx] = t #cands_rnn_costs[i] = cands_costs[i] cands_costs[i] += lm_score * self.weight_lm + tm_score * self.weight_tm + self.weight_wp cands_lm_costs[i] = lm_score cands_tm_costs[i] = tm_score cands_unk_nums[i] = unk_tm_num best_costs_indices = argpartition(cands_costs.flatten(), n_samples)[:n_samples] trans_indices = cands_trans_indices[best_costs_indices] word_indices = cands_word_indices[best_costs_indices] costs = cands_costs[best_costs_indices] _lm_costs = cands_lm_costs[best_costs_indices] _tm_costs = cands_tm_costs[best_costs_indices] _unk_nums = cands_unk_nums[best_costs_indices] _rnn_costs = cands_rnn_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_str_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_lm_costs = [[]] * n_samples new_tm_costs = [[]] * n_samples new_rnn_costs = [[]] * n_samples new_unk_nums = [[]] * n_samples new_aligns = [[]] * n_samples new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost, _lm_score, _tm_score, _unk_num, _rnn_cost) in enumerate( zip(trans_indices, word_indices, costs, _lm_costs, _tm_costs, _unk_nums, _rnn_costs)): new_trans[i] = trans[orig_idx] + [next_word] if next_word == self.unk_id: new_str_trans[i] = str_trans[orig_idx] + [unk_trans[orig_idx]] else: new_str_trans[i] = str_trans[orig_idx] + self.trg_i2w([next_word]) new_costs[i] = next_cost new_lm_costs[i] = lm_costs[orig_idx] + [_lm_score] new_tm_costs[i] = tm_costs[orig_idx] + [_tm_score] new_unk_nums[i] = unk_nums[orig_idx] + [_unk_num] new_rnn_costs[i] = rnn_costs[orig_idx] + [_rnn_cost] new_aligns[i] = aligns[orig_idx] + [aln_score_mat[:,orig_idx]] for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] str_trans = [] #infos = [] lm_costs = [] tm_costs = [] unk_nums = [] rnn_costs = [] aligns = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) str_trans.append(new_str_trans[i]) #infos.append(new_infos[i]) lm_costs.append(new_lm_costs[i]) tm_costs.append(new_tm_costs[i]) rnn_costs.append(new_rnn_costs[i]) unk_nums.append(new_unk_nums[i]) aligns.append(new_aligns[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) fin_str_trans.append(new_str_trans[i]) #fin_infos.append(new_infos[i]) fin_lm_costs.append(new_lm_costs[i]) fin_tm_costs.append(new_tm_costs[i]) fin_rnn_costs.append(new_rnn_costs[i]) fin_unk_nums.append(new_unk_nums[i]) fin_aligns.append(new_aligns[i]) states = map(lambda x : x[indices], new_states) if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(sen, seq, n_samples, False, minlen) elif n_samples < 50: logger.warning("Still no translations: try beam size {}".format(n_samples * 2)) return self.search(sen, seq, n_samples * 2, False, minlen) elif n_samples < 100: logger.warning("Still no translations: try beam size {}, and --ignore UNK".format(n_samples)) return self.search(sen, seq, n_samples, True, minlen) else: logger.err("cannot find translations, return an unreliable result") fin_trans = trans fin_str_trans = str_trans fin_costs = costs fin_lm_costs = lm_costs fin_tm_costs = tm_costs fin_rnn_costs = rnn_costs fin_unk_nums = unk_nums fin_aligns = aligns #else: # logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)] fin_str_trans = numpy.array(fin_str_trans)[numpy.argsort(fin_costs)] fin_tm_costs = numpy.array(fin_tm_costs)[numpy.argsort(fin_costs)] fin_lm_costs = numpy.array(fin_lm_costs)[numpy.argsort(fin_costs)] fin_unk_nums = numpy.array(fin_unk_nums)[numpy.argsort(fin_costs)] fin_rnn_costs = numpy.array(fin_rnn_costs)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs, fin_aligns, fin_lm_costs, fin_tm_costs, fin_str_trans, fin_unk_nums, fin_rnn_costs
def search(self, seq, n_samples, eos_id, unk_id, ignore_unk=False, minlen=1, final=False): num_models = len(self.enc_decs) c = [] for i in xrange(num_models): c.append(self.comp_repr[i](seq)[0]) states = [] for i in xrange(num_models): states.append(map(lambda x : x[None, :], self.comp_init_states[i](c[i]))) dim = states[0][0].shape[1] num_levels = len(states[0]) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) #log_probs = (numpy.log(self.comp_next_probs_0(c, k, last_words, *states)[0]) + numpy.log(self.comp_next_probs_1(c, k, last_words, *states)[0]))/2. log_probs = sum(numpy.log(self.comp_next_probs[i](c[i], k, last_words, *states[i])[0]) for i in xrange(num_models))/num_models # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:,eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [] for i in xrange(num_models): new_states.append([numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)]) inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost for level in range(num_levels): for j in xrange(num_models): new_states[j][level][i] = states[j][level][orig_idx] inputs[i] = next_word for i in xrange(num_models): new_states[i]=self.comp_next_states[i](c[i], k, inputs, *new_states[i]) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != eos_id: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) for i in xrange(num_models): states[i]=map(lambda x : x[indices], new_states[i]) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, eos_id=eos_id, unk_id=unk_id, ignore_unk=False, minlen=minlen, final=final) elif not final: logger.warning("No appropriate translations: using larger vocabulary") raise RuntimeError else: logger.warning("No appropriate translation: return empty translation") fin_trans=[[]] fin_costs = [0.0] fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs
def search_with_truth(self, seq, truth, n_samples, ignore_unk=False, minlen=1, idict=None): for ww in truth: print idict[ww], print '' c = self.comp_repr(seq)[0] # one representation at each encoding time step # c.shape[0] = len(seq) # states is a dim_dimensional vector, output of initialization unit states = map(lambda x: x[None, :], self.comp_init_states(c)) # dimension of hidden layer dim = states[0].shape[1] # always 1 in case of non-deep GRU num_levels = len(states) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] # maximum translation length allowed is 3*len(source) for k in range(3 * len(seq)): if n_samples == 0: # all translation ended break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) log_probs = numpy.log( self.comp_next_probs(c, k, last_words, *states)[0]) print str(k) + '\t' + '|', if k > 0 and k <= len(truth): if truth[k - 1] < 30000: print idict[truth[k - 1]] + '\t' + '|', else: print '<EOS>' + '\t', for ww in last_words: print idict[ww] + ' ', print '' else: print last_words # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:, self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] # which beam? trans_indices = best_costs_indices / voc_size # which word? word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * (n_samples) new_costs = numpy.zeros(n_samples) new_states = [ numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels) ] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) # beam size is naturally reduced when multiple best # new trans came from same beam states = map(lambda x: x[indices], new_states) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search_with_truth(seq, truth, n_samples, False, minlen, idict) elif n_samples < 500: logger.warning( "Still no translations: try beam size {}".format( n_samples * 2)) return self.search_with_truth(seq, truth, n_samples * 2, False, minlen, idict) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs
def search(self, seq, n_samples, ignore_unk=False, minlen=1, getRep=False): cdata = self.comp_repr(seq) #print len(cdata) c = cdata[0] forward_rester = cdata[1] forward_updater = cdata[2] backward_rester = cdata[3] backward_updater = cdata[4] max_forward_rester = numpy.amax(forward_rester, axis=1) max_backward_rester = numpy.amax(backward_rester, axis=1) max_forward_updater = numpy.amax(forward_updater, axis=1) max_backward_updater = numpy.amax(backward_updater, axis=1) for_retend = [] back_retend = [] for_uptend = [] back_uptend = [] for i in range(0, max_forward_rester.shape[0] - 1): for_retend.append(max_forward_rester[i + 1] - max_forward_rester[i]) for i in range(0, max_backward_rester.shape[0] - 1): back_retend.append(max_backward_rester[i] - max_backward_rester[i + 1]) for_retend = numpy.array(for_retend) back_retend = numpy.array(back_retend) for i in range(0, max_forward_updater.shape[0] - 1): for_uptend.append(max_forward_updater[i + 1] - max_forward_updater[i]) for i in range(0, max_backward_updater.shape[0] - 1): back_uptend.append(max_backward_updater[i] - max_backward_updater[i + 1]) for_uptend = numpy.array(for_uptend) back_uptend = numpy.array(back_uptend) print(for_retend + back_retend) / 2.0 print(for_uptend + back_uptend) / 2.0 print("----------------------------------------------") avg_forward_rester = numpy.sum(forward_rester, axis=1) / forward_rester.shape[1] avg_backward_rester = numpy.sum(backward_rester, axis=1) / backward_rester.shape[1] avg_forward_updater = numpy.sum(forward_updater, axis=1) / forward_updater.shape[1] avg_backward_updater = numpy.sum(backward_updater, axis=1) / backward_updater.shape[1] for_retend = [] back_retend = [] for_uptend = [] back_uptend = [] for i in range(0, avg_forward_rester.shape[0] - 1): for_retend.append(avg_forward_rester[i + 1] - avg_forward_rester[i]) for i in range(0, avg_backward_rester.shape[0] - 1): back_retend.append(avg_backward_rester[i] - avg_backward_rester[i + 1]) for_retend = numpy.array(for_retend) back_retend = numpy.array(back_retend) for i in range(0, avg_forward_updater.shape[0] - 1): for_uptend.append(avg_forward_updater[i + 1] - avg_forward_updater[i]) for i in range(0, avg_backward_updater.shape[0] - 1): back_uptend.append(avg_backward_updater[i] - avg_backward_updater[i + 1]) for_uptend = numpy.array(for_uptend) back_uptend = numpy.array(back_uptend) print(for_retend + back_retend) / 2.0 print(for_uptend + back_uptend) / 2.0 if getRep: return c # print "c shape is %s " % (str(c.shape)) states = map(lambda x: x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] fin_align = [] trans = [[]] costs = [0.0] dec_rester = [[]] * n_samples dec_updater = [[]] * n_samples fin_dec_rester = [] fin_dec_updater = [] align = [] for i in range(n_samples): align.append(numpy.array([numpy.zeros(len(seq))])) for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) ans = self.comp_next_probs(c, k, last_words, *states) probs = ans[0] alignments = ans[1] log_probs = numpy.log(probs) trester = ans[2] tupdater = ans[3] trester = numpy.sum(trester, axis=1) / trester.shape[1] tupdater = numpy.sum(tupdater, axis=1) / tupdater.shape[1] #print "___________________" #print tupdater # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:, self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] #print best_costs_indices # Form a beam for the next iteration new_rester = [[]] * n_samples new_updater = [[]] * n_samples new_align = [[]] * n_samples new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [ numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels) ] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost #dec_rester[i] = dec_rester[i] + [trester[orig_idx]] #print orig_idx #align[i] = numpy.concatenate((align[i] , [alignments[:,orig_idx]]), axis=0) new_align[i] = numpy.concatenate( (align[orig_idx], [alignments[:, orig_idx]]), axis=0) new_rester[i] = dec_rester[orig_idx] + [trester[orig_idx]] new_updater[i] = dec_updater[orig_idx] + [tupdater[orig_idx]] for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] align = [] dec_rester = [] dec_updater = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) align.append(new_align[i]) dec_rester.append(new_rester[i]) dec_updater.append(new_updater[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) fin_align.append(new_align[i]) fin_dec_rester.append(new_rester[i]) fin_dec_updater.append(new_updater[i]) states = map(lambda x: x[indices], new_states) for i in range(len(fin_align)): talign = fin_align[i] fin_align[i] = talign[1:, :] #print fin_align # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 500: logger.warning( "Still no translations: try beam size {}".format( n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: logger.error("Translation failed") tfin_align = [] index = numpy.argsort(fin_costs) for i in range(0, len(index)): tfin_align.append(fin_align[index[i]]) fin_dec_rester = numpy.array(fin_dec_rester)[numpy.argsort(fin_costs)] fin_dec_updater = numpy.array(fin_dec_updater)[numpy.argsort( fin_costs)] fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs, tfin_align, fin_dec_rester, fin_dec_updater
def search(self, seq, n_samples, ignore_unk=False, minlen=1): c = self.comp_repr(seq)[0] states = map(lambda x: x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] # added by Zhaopeng Tu, 2015-11-02 if self.enc_dec.state['maintain_coverage']: coverage_dim = self.enc_dec.state['coverage_dim'] if self.enc_dec.state[ 'use_linguistic_coverage'] and self.enc_dec.state[ 'coverage_accumulated_operation'] == 'subtractive': coverages = numpy.ones((c.shape[0], 1, coverage_dim), dtype='float32') else: coverages = numpy.zeros((c.shape[0], 1, coverage_dim), dtype='float32') fin_coverages = [] else: coverages = None if self.enc_dec.state['maintain_coverage'] and self.enc_dec.state[ 'use_linguistic_coverage'] and self.enc_dec.state[ 'use_fertility_model']: fertility = self.comp_fert(c) else: fertility = None num_levels = len(states) fin_trans = [] fin_costs = [] fin_aligns = [] trans = [[]] aligns = [[]] costs = [0.0] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) results = self.comp_next_probs(c, k, last_words, *states, coverage_before=coverages, fertility=fertility) log_probs = numpy.log(results[0]) # alignment shape: (source_len, beam_size) alignment = results[1] # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:, self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_aligns = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [ numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels) ] inputs = numpy.zeros(n_samples, dtype="int64") if self.enc_dec.state['maintain_coverage']: new_coverages = numpy.zeros( (c.shape[0], n_samples, coverage_dim), dtype='float32') else: new_coverages = None for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] # alignment shape: (source_len, beam_size) new_aligns[i] = aligns[orig_idx] + [alignment[:, orig_idx]] new_costs[i] = next_cost for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word if self.enc_dec.state['maintain_coverage']: new_coverages[:, i, :] = coverages[:, orig_idx, :] new_states = self.comp_next_states(c, k, inputs, *new_states, coverage_before=new_coverages, fertility=fertility) if self.enc_dec.state['maintain_coverage']: new_coverages = new_states[-1] new_states = new_states[:-1] # Filter the sequences that end with end-of-sequence character trans = [] aligns = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) aligns.append(new_aligns[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_aligns.append(new_aligns[i]) fin_costs.append(new_costs[i]) if self.enc_dec.state['maintain_coverage']: fin_coverages.append(new_coverages[:, i, 0]) states = map(lambda x: x[indices], new_states) if self.enc_dec.state['maintain_coverage']: coverages = numpy.zeros((c.shape[0], n_samples, coverage_dim), dtype='float32') for i in xrange(n_samples): coverages[:, i, :] = new_coverages[:, indices[i], :] # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 100: logger.warning( "Still no translations: try beam size {}".format( n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: fin_trans = trans fin_aligns = aligns fin_costs = costs if self.enc_dec.state['maintain_coverage']: fin_coverages = coverages[:, :, 0].transpose().tolist() logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)] if self.enc_dec.state['maintain_coverage']: fin_coverages = numpy.array(fin_coverages)[numpy.argsort( fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) if self.enc_dec.state['maintain_coverage']: if self.enc_dec.state[ 'use_linguistic_coverage'] and self.enc_dec.state[ 'use_fertility_model']: return fin_trans, fin_aligns, fin_costs, fin_coverages, fertility else: return fin_trans, fin_aligns, fin_costs, fin_coverages else: return fin_trans, fin_aligns, fin_costs
def search(self, seq, n_samples, ignore_unk=False, minlen=1): c = self.comp_repr(seq)[0] states = map(lambda x : x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] # added by Zhaopeng Tu, 2015-11-02 if self.enc_dec.state['maintain_coverage']: coverage_dim = self.enc_dec.state['coverage_dim'] if self.enc_dec.state['use_linguistic_coverage'] and self.enc_dec.state['coverage_accumulated_operation'] == 'subtractive': coverages = numpy.ones((c.shape[0], 1, coverage_dim), dtype='float32') else: coverages = numpy.zeros((c.shape[0], 1, coverage_dim), dtype='float32') fin_coverages = [] else: coverages = None if self.enc_dec.state['maintain_coverage'] and self.enc_dec.state['use_linguistic_coverage'] and self.enc_dec.state['use_fertility_model']: fertility = self.comp_fert(c) else: fertility = None num_levels = len(states) fin_trans = [] fin_costs = [] fin_aligns = [] trans = [[]] aligns = [[]] costs = [0.0] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) results = self.comp_next_probs(c, k, last_words, *states, coverage_before=coverages, fertility=fertility) log_probs = numpy.log(results[0]) # alignment shape: (source_len, beam_size) alignment = results[1] # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:,self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_aligns = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)] inputs = numpy.zeros(n_samples, dtype="int64") if self.enc_dec.state['maintain_coverage']: new_coverages = numpy.zeros((c.shape[0], n_samples, coverage_dim), dtype='float32') else: new_coverages = None for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] # alignment shape: (source_len, beam_size) new_aligns[i] = aligns[orig_idx] + [alignment[:,orig_idx]] new_costs[i] = next_cost for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word if self.enc_dec.state['maintain_coverage']: new_coverages[:,i,:] = coverages[:,orig_idx,:] new_states = self.comp_next_states(c, k, inputs, *new_states, coverage_before=new_coverages, fertility=fertility) if self.enc_dec.state['maintain_coverage']: new_coverages = new_states[-1] new_states = new_states[:-1] # Filter the sequences that end with end-of-sequence character trans = [] aligns = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) aligns.append(new_aligns[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_aligns.append(new_aligns[i]) fin_costs.append(new_costs[i]) if self.enc_dec.state['maintain_coverage']: fin_coverages.append(new_coverages[:,i,0]) states = map(lambda x : x[indices], new_states) if self.enc_dec.state['maintain_coverage']: coverages = numpy.zeros((c.shape[0], n_samples, coverage_dim), dtype='float32') for i in xrange(n_samples): coverages[:,i,:] = new_coverages[:, indices[i], :] # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 100: logger.warning("Still no translations: try beam size {}".format(n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: fin_trans = trans fin_aligns = aligns fin_costs = costs if self.enc_dec.state['maintain_coverage']: fin_coverages = coverages[:,:,0].transpose().tolist() logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)] if self.enc_dec.state['maintain_coverage']: fin_coverages = numpy.array(fin_coverages)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) if self.enc_dec.state['maintain_coverage']: if self.enc_dec.state['use_linguistic_coverage'] and self.enc_dec.state['use_fertility_model']: return fin_trans, fin_aligns, fin_costs, fin_coverages, fertility else: return fin_trans, fin_aligns, fin_costs, fin_coverages else: return fin_trans, fin_aligns, fin_costs
def search(self, sen, seq, n_samples, ignore_unk=False, minlen=1): src_seq = sen.split(' ') uni_trans_set = self.get_uni_trans(src_seq) #print >> sys.stderr, uni_trans_set c = self.comp_repr(seq)[0] states = map(lambda x: x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] fin_str_trans = [] #fin_infos = [] fin_aligns = [] fin_lm_costs = [] fin_tm_costs = [] fin_rnn_costs = [] fin_unk_nums = [] trans = [[]] costs = [0.0] str_trans = [[]] #infos = [[]] lm_costs = [[]] tm_costs = [[]] rnn_costs = [[]] unk_nums = [[]] aligns = [[]] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) next_probs, aln_score_mat = self.comp_next_probs( c, k, last_words, *states) log_probs = numpy.log(next_probs) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_id] = -numpy.inf if k < minlen: log_probs[:, self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs * self.weight_rnn flat_next_costs = next_costs.flatten() cands_costs_indices = argpartition( flat_next_costs.flatten(), n_samples * 100)[:n_samples * 100] # Decypher flatten indices voc_size = log_probs.shape[1] cands_trans_indices = cands_costs_indices / voc_size cands_word_indices = cands_costs_indices % voc_size cands_costs = flat_next_costs[cands_costs_indices] cands_lm_costs = numpy.zeros(len(cands_costs)) cands_tm_costs = numpy.zeros(len(cands_costs)) cands_unk_nums = numpy.zeros(len(cands_costs)) cands_rnn_costs = (-1 * log_probs).flatten()[cands_costs_indices] unk_trans = {} #add SMT feature scores to costs for i, (orig_idx, next_word, next_cost) in enumerate( zip(cands_trans_indices, cands_word_indices, cands_costs)): sorted_aln_idx = numpy.argsort(-aln_score_mat[:, orig_idx])[:3] aln_score_array = [] for aln_idx in sorted_aln_idx: aln_score_array.append( [aln_idx, aln_score_mat[aln_idx, orig_idx]]) lm_score = -self.get_lm_score(str_trans[orig_idx] + self.trg_i2w([next_word])) #tm_score, unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], self.trg_i2w([next_word])[0]) tm_score, unk_tm_num, _ = self.get_tm_score_new( src_seq, aln_score_array, self.trg_i2w([next_word])[0]) tm_score = -tm_score if next_word == self.unk_id: #lm_score = numpy.inf #tm_score = numpy.inf unk_trans[orig_idx] = 'UNK' _unk_score = tm_score for t in uni_trans_set: _ls = -self.get_lm_score(str_trans[orig_idx] + [t]) #_ts, _unk_tm_num = self.get_tm_score(src_seq, aln_score_mat[:,orig_idx], t) _ts, _unk_tm_num, match_idx = self.get_tm_score_new( src_seq, aln_score_array, t) _ts = -_ts if match_idx == 0 and _ls * self.weight_lm + _ts * self.weight_tm < lm_score * self.weight_lm + tm_score * self.weight_tm: lm_score = _ls tm_score = _ts unk_tm_num = _unk_tm_num unk_trans[orig_idx] = t #cands_rnn_costs[i] = cands_costs[i] cands_costs[ i] += lm_score * self.weight_lm + tm_score * self.weight_tm + self.weight_wp cands_lm_costs[i] = lm_score cands_tm_costs[i] = tm_score cands_unk_nums[i] = unk_tm_num best_costs_indices = argpartition(cands_costs.flatten(), n_samples)[:n_samples] trans_indices = cands_trans_indices[best_costs_indices] word_indices = cands_word_indices[best_costs_indices] costs = cands_costs[best_costs_indices] _lm_costs = cands_lm_costs[best_costs_indices] _tm_costs = cands_tm_costs[best_costs_indices] _unk_nums = cands_unk_nums[best_costs_indices] _rnn_costs = cands_rnn_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_str_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_lm_costs = [[]] * n_samples new_tm_costs = [[]] * n_samples new_rnn_costs = [[]] * n_samples new_unk_nums = [[]] * n_samples new_aligns = [[]] * n_samples new_states = [ numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels) ] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost, _lm_score, _tm_score, _unk_num, _rnn_cost) in enumerate( zip(trans_indices, word_indices, costs, _lm_costs, _tm_costs, _unk_nums, _rnn_costs)): new_trans[i] = trans[orig_idx] + [next_word] if next_word == self.unk_id: new_str_trans[i] = str_trans[orig_idx] + [ unk_trans[orig_idx] ] else: new_str_trans[i] = str_trans[orig_idx] + self.trg_i2w( [next_word]) new_costs[i] = next_cost new_lm_costs[i] = lm_costs[orig_idx] + [_lm_score] new_tm_costs[i] = tm_costs[orig_idx] + [_tm_score] new_unk_nums[i] = unk_nums[orig_idx] + [_unk_num] new_rnn_costs[i] = rnn_costs[orig_idx] + [_rnn_cost] new_aligns[i] = aligns[orig_idx] + [aln_score_mat[:, orig_idx]] for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] str_trans = [] #infos = [] lm_costs = [] tm_costs = [] unk_nums = [] rnn_costs = [] aligns = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) str_trans.append(new_str_trans[i]) #infos.append(new_infos[i]) lm_costs.append(new_lm_costs[i]) tm_costs.append(new_tm_costs[i]) rnn_costs.append(new_rnn_costs[i]) unk_nums.append(new_unk_nums[i]) aligns.append(new_aligns[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) fin_str_trans.append(new_str_trans[i]) #fin_infos.append(new_infos[i]) fin_lm_costs.append(new_lm_costs[i]) fin_tm_costs.append(new_tm_costs[i]) fin_rnn_costs.append(new_rnn_costs[i]) fin_unk_nums.append(new_unk_nums[i]) fin_aligns.append(new_aligns[i]) states = map(lambda x: x[indices], new_states) if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(sen, seq, n_samples, False, minlen) elif n_samples < 50: logger.warning( "Still no translations: try beam size {}".format( n_samples * 2)) return self.search(sen, seq, n_samples * 2, False, minlen) elif n_samples < 100: logger.warning( "Still no translations: try beam size {}, and --ignore UNK" .format(n_samples)) return self.search(sen, seq, n_samples, True, minlen) else: logger.err( "cannot find translations, return an unreliable result") fin_trans = trans fin_str_trans = str_trans fin_costs = costs fin_lm_costs = lm_costs fin_tm_costs = tm_costs fin_rnn_costs = rnn_costs fin_unk_nums = unk_nums fin_aligns = aligns #else: # logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)] fin_str_trans = numpy.array(fin_str_trans)[numpy.argsort(fin_costs)] fin_tm_costs = numpy.array(fin_tm_costs)[numpy.argsort(fin_costs)] fin_lm_costs = numpy.array(fin_lm_costs)[numpy.argsort(fin_costs)] fin_unk_nums = numpy.array(fin_unk_nums)[numpy.argsort(fin_costs)] fin_rnn_costs = numpy.array(fin_rnn_costs)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs, fin_aligns, fin_lm_costs, fin_tm_costs, fin_str_trans, fin_unk_nums, fin_rnn_costs
def search(self, seq, n_samples, ignore_unk=False, minlen=1, compute_alignment=False, have_source=False): x = [] last_split = -1 for i in xrange(len(seq)): if seq[i] == self.split_id: tmp = copy.deepcopy(seq[last_split + 1:i + 1]) x.append(tmp) last_split = i assert self.num_systems == len(x) for i in xrange(self.num_systems): x[i][-1] = self.source_eos_id c = self.comp_repr(*x) #[0] ''' print len(c) for i in c: print i.shape ''' #print self.get_sample(1,5,1,*x) states = map(lambda x: x[None, :], self.comp_init_states(*c)) #c = numpy.concatenate(c, axis=0) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] if have_source: minlen = (len(x[0]) - 1) / 2 #print minlen else: minlen = (len(seq) - self.num_systems) / self.num_systems / 2 if compute_alignment: fin_aligns = [] aligns = [[]] for k in range(6 * minlen): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) if compute_alignment: align = self.comp_align(k, last_words, *(states + c)) log_probs = numpy.log( self.comp_next_probs(k, last_words, *(states + c))[0]) #print log_probs # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:, self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), n_samples)[:n_samples] #print best_costs_indices # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] #print trans_indices #print word_indices # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [ numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels) ] if compute_alignment: new_aligns = [[]] * n_samples inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost if compute_alignment: new_aligns[i] = aligns[orig_idx] + [align[:, orig_idx]] for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(k, inputs, *(new_states + c)) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] aligns = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) if compute_alignment: aligns.append(new_aligns[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) if compute_alignment: fin_aligns.append(new_aligns[i]) states = map(lambda x: x[indices], new_states) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 100: logger.warning( "Still no translations: try beam size {}".format( n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] if compute_alignment: fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) if compute_alignment: return fin_trans, fin_aligns, fin_costs else: return fin_trans, fin_costs
def search(self, seqin, seq, n_samples, ignore_unk=False, minlen=1): src_seq = seqin.split(' ') c = self.comp_repr(seq)[0] states = map(lambda x: x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] fin_aligns = [] trans = [[]] costs = [0.0] aligns = [[]] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) next_probs, aln_score_mat = self.comp_next_probs( c, k, last_words, *states) log_probs = numpy.log(next_probs) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_id] = -numpy.inf if k < minlen: log_probs[:, self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs * self.rnn_weight flat_next_costs = next_costs.flatten() cands_costs_indices = argpartition( flat_next_costs.flatten(), n_samples * 100)[:n_samples * 100] # Decypher flatten indices voc_size = log_probs.shape[1] cands_trans_indices = cands_costs_indices / voc_size cands_word_indices = cands_costs_indices % voc_size cands_costs = flat_next_costs[cands_costs_indices] #add SMT feature scores to costs for i, (orig_idx, next_word, next_cost) in enumerate( zip(cands_trans_indices, cands_word_indices, cands_costs)): lm_score = self.get_lm_score(trans[orig_idx] + [next_word]) tm_score = self.get_tm_score(src_seq, aln_score_mat[:, orig_idx], self.trg_i2w([next_word])[0]) cands_costs[i] += -1.0 * lm_score + -1.0 * tm_score best_costs_indices = argpartition(cands_costs.flatten(), n_samples)[:n_samples] trans_indices = cands_trans_indices[best_costs_indices] word_indices = cands_word_indices[best_costs_indices] costs = cands_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_aligns = [[]] * n_samples new_states = [ numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels) ] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost new_aligns[i] = aligns[orig_idx] + [aln_score_mat[:, orig_idx]] for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] aligns = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) aligns.append(new_aligns[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) fin_aligns.append(new_aligns[i]) states = map(lambda x: x[indices], new_states) if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seqin, seq, n_samples, False, minlen) elif n_samples < 100: logger.warning( "Still no translations: try beam size {}".format( n_samples * 2)) return self.search(seqin, seq, n_samples * 2, False, minlen) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_aligns = numpy.array(fin_aligns)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs, fin_aligns