def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None): m = self.__model tanh = functions.tanh lstm = functions.lstm batch_size = len(src_batch) src_len = len(src_batch[0]) src_stoi = self.__src_vocab.stoi trg_stoi = self.__trg_vocab.stoi trg_itos = self.__trg_vocab.itos s_c = wrapper.zeros((batch_size, self.__n_hidden)) # encoding s_x = wrapper.make_var([src_stoi('</s>') for _ in range(batch_size)], dtype=np.int32) s_i = tanh(m.w_xi(s_x)) s_c, s_p = lstm(s_c, m.w_ip(s_i)) for l in reversed(range(src_len)): s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32) s_i = tanh(m.w_xi(s_x)) s_c, s_p = lstm(s_c, m.w_ip(s_i) + m.w_pp(s_p)) s_c, s_q = lstm(s_c, m.w_pq(s_p)) hyp_batch = [[] for _ in range(batch_size)] # decoding if is_training: accum_loss = wrapper.zeros(()) trg_len = len(trg_batch[0]) for l in range(trg_len): s_j = tanh(m.w_qj(s_q)) r_y = m.w_jy(s_j) s_t = wrapper.make_var([trg_stoi(trg_batch[k][l]) for k in range(batch_size)], dtype=np.int32) accum_loss += functions.softmax_cross_entropy(r_y, s_t) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) s_c, s_q = lstm(s_c, m.w_yq(s_t) + m.w_qq(s_q)) return hyp_batch, accum_loss else: while len(hyp_batch[0]) < generation_limit: s_j = tanh(m.w_qj(s_q)) r_y = m.w_jy(s_j) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break s_y = wrapper.make_var(output, dtype=np.int32) s_c, s_q = lstm(s_c, m.w_yq(s_y) + m.w_qq(s_q)) return hyp_batch
def __forward(self, is_training, src_batch, trg_batch=None, generation_limit=None): m = self.__model tanh = functions.tanh lstm = functions.lstm batch_size = len(src_batch) hidden_size = self.__n_hidden src_len = len(src_batch[0]) trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit src_stoi = self.__src_vocab.stoi trg_stoi = self.__trg_vocab.stoi trg_itos = self.__trg_vocab.itos hidden_zeros = wrapper.zeros((batch_size, hidden_size)) sum_e_zeros = wrapper.zeros((batch_size, 1)) # make embedding list_x = [] for l in range(src_len): s_x = wrapper.make_var( [src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32) list_x.append(s_x) # forward encoding c = hidden_zeros s_a = hidden_zeros list_a = [] for l in range(src_len): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a)) list_a.append(s_a) # backward encoding c = hidden_zeros s_b = hidden_zeros list_b = [] for l in reversed(range(src_len)): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b)) list_b.insert(0, s_b) # decoding c = hidden_zeros s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0])) s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)], dtype=np.int32) hyp_batch = [[] for _ in range(batch_size)] accum_loss = wrapper.zeros(()) if is_training else None #for n in range(src_len): # print(src_batch[0][n], end=' ') #print() for l in range(trg_len): # calculate attention weights list_e = [] sum_e = sum_e_zeros for n in range(src_len): s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p)) r_e = functions.exp(m.w_we(s_w)) #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden))) list_e.append(r_e) sum_e += r_e #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden)) # make attention vector s_c = hidden_zeros s_d = hidden_zeros for n in range(src_len): s_e = list_e[n] / sum_e #s_c += s_e * list_a[n] #s_d += s_e * list_b[n] s_c += functions.reshape( functions.batch_matmul(list_a[n], s_e), (batch_size, hidden_size)) s_d += functions.reshape( functions.batch_matmul(list_b[n], s_e), (batch_size, hidden_size)) #zxcv = wrapper.get_data(s_e)[0][0] #if zxcv > 0.9: asdf='#' #elif zxcv > 0.7: asdf='*' #elif zxcv > 0.3: asdf='+' #elif zxcv > 0.1: asdf='.' #else: asdf=' ' #print(asdf * len(src_batch[0][n]), end=' ') # generate next word c, s_p = lstm( c, m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d)) r_y = m.w_py(s_p) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) #print(hyp_batch[0][-1]) if is_training: s_t = wrapper.make_var( [trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)], dtype=np.int32) accum_loss += functions.softmax_cross_entropy(r_y, s_t) s_y = s_t else: if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break s_y = wrapper.make_var(output, dtype=np.int32) return hyp_batch, accum_loss
def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None): m = self.__model tanh = functions.tanh lstm = functions.lstm batch_size = len(src_batch) hidden_size = self.__n_hidden src_len = len(src_batch[0]) trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit src_stoi = self.__src_vocab.stoi trg_stoi = self.__trg_vocab.stoi trg_itos = self.__trg_vocab.itos hidden_zeros = wrapper.zeros((batch_size, hidden_size)) sum_e_zeros = wrapper.zeros((batch_size, 1)) # make embedding list_x = [] for l in range(src_len): s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32) list_x.append(s_x) # forward encoding c = hidden_zeros s_a = hidden_zeros list_a = [] for l in range(src_len): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a)) list_a.append(s_a) # backward encoding c = hidden_zeros s_b = hidden_zeros list_b = [] for l in reversed(range(src_len)): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b)) list_b.insert(0, s_b) # decoding c = hidden_zeros s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0])) s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)], dtype=np.int32) hyp_batch = [[] for _ in range(batch_size)] accum_loss = wrapper.zeros(()) if is_training else None #for n in range(src_len): # print(src_batch[0][n], end=' ') #print() for l in range(trg_len): # calculate attention weights list_e = [] sum_e = sum_e_zeros for n in range(src_len): s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p)) r_e = functions.exp(m.w_we(s_w)) #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden))) list_e.append(r_e) sum_e += r_e #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden)) # make attention vector s_c = hidden_zeros s_d = hidden_zeros for n in range(src_len): s_e = list_e[n] / sum_e #s_c += s_e * list_a[n] #s_d += s_e * list_b[n] s_c += functions.reshape(functions.batch_matmul(list_a[n], s_e), (batch_size, hidden_size)) s_d += functions.reshape(functions.batch_matmul(list_b[n], s_e), (batch_size, hidden_size)) #zxcv = wrapper.get_data(s_e)[0][0] #if zxcv > 0.9: asdf='#' #elif zxcv > 0.7: asdf='*' #elif zxcv > 0.3: asdf='+' #elif zxcv > 0.1: asdf='.' #else: asdf=' ' #print(asdf * len(src_batch[0][n]), end=' ') # generate next word c, s_p = lstm(c, m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d)) r_y = m.w_py(s_p) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) #print(hyp_batch[0][-1]) if is_training: s_t = wrapper.make_var([trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)], dtype=np.int32) accum_loss += functions.softmax_cross_entropy(r_y, s_t) s_y = s_t else: if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break s_y = wrapper.make_var(output, dtype=np.int32) return hyp_batch, accum_loss