def update_batch(self, words_batch, tags_batch): dynet.renew_cg() length = max(len(words) for words in words_batch) word_ids = np.zeros((length, len(words_batch)), dtype='int32') for j, words in enumerate(words_batch): for i, word in enumerate(words): word_ids[i, j] = self.vw.w2i.get(word, self.UNK) tag_ids = np.zeros((length, len(words_batch)), dtype='int32') for j, tags in enumerate(tags_batch): for i, tag in enumerate(tags): tag_ids[i, j] = self.vt.w2i.get(tag, self.UNK) wembs = [dynet.lookup_batch(self._E, word_ids[i]) for i in range(length)] wembs = [dynet.noise(we, 0.1) for we in wembs] f_state = self._fwd_lstm.initial_state() b_state = self._bwd_lstm.initial_state() fw = [x.output() for x in f_state.add_inputs(wembs)] bw = [x.output() for x in b_state.add_inputs(reversed(wembs))] H = dynet.parameter(self._pH) O = dynet.parameter(self._pO) errs = [] for i, (f, b) in enumerate(zip(fw, reversed(bw))): f_b = dynet.concatenate([f,b]) r_t = O * (dynet.tanh(H * f_b)) err = dynet.pickneglogsoftmax_batch(r_t, tag_ids[i]) errs.append(dynet.sum_batches(err)) sum_errs = dynet.esum(errs) squared = -sum_errs # * sum_errs losses = sum_errs.scalar_value() sum_errs.backward() self._sgd.update() return losses
def predict_batch(self, words_batch): dynet.renew_cg() length = max(len(words) for words in words_batch) word_ids = np.zeros((length, len(words_batch)), dtype='int32') for j, words in enumerate(words_batch): for i, word in enumerate(words): word_ids[i, j] = self.vw.w2i.get(word, self.UNK) wembs = [dynet.lookup_batch(self._E, word_ids[i]) for i in range(length)] f_state = self._fwd_lstm.initial_state() b_state = self._bwd_lstm.initial_state() fw = [x.output() for x in f_state.add_inputs(wembs)] bw = [x.output() for x in b_state.add_inputs(reversed(wembs))] H = dynet.parameter(self._pH) O = dynet.parameter(self._pO) tags_batch = [[] for _ in range(len(words_batch))] for i, (f, b) in enumerate(zip(fw, reversed(bw))): r_t = O * (dynet.tanh(H * dynet.concatenate([f, b]))) out = dynet.softmax(r_t).npvalue() for j in range(len(words_batch)): tags_batch[j].append(self.vt.i2w[np.argmax(out.T[j])]) return tags_batch
def attend(self, input_mat, state, w1dt, input_len, batch_size): global attention_w2 global attention_v w2 = dy.parameter(attention_w2) v = dy.parameter(attention_v) w2dt = w2 * dy.concatenate(list(state.s())) unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt))) unnormalized = dy.reshape(unnormalized, (input_len, ), batch_size) att_weights = dy.softmax(unnormalized) context = input_mat * att_weights return context, att_weights
def attend_batch(input_mat, state, w1dt, batch_size, input_length): #print "in attend batch" global attention_w2 global attention_v w2 = dy.parameter(attention_w2) v = dy.parameter(attention_v) #print "Calculating w2dt" w2dt = w2*dy.concatenate(list(state.s())) unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt))) attention_reshaped = dy.reshape(unnormalized, (input_length, ), batch_size) att_weights = dy.softmax(attention_reshaped) context = input_mat * att_weights return context
def attend(input_mat, state, w1dt): global attention_w2 global attention_v w2 = dy.parameter(attention_w2) v = dy.parameter(attention_v) # input_mat: (encoder_state x seqlen) => input vecs concatenated as cols # w1dt: (attdim x seqlen) # w2dt: (attdim x attdim) w2dt = w2*dy.concatenate(list(state.s())) # att_weights: (seqlen,) row vector unnormalized = dy.transpose(v * dy.tanh(dy.colwise_add(w1dt, w2dt))) att_weights = dy.softmax(unnormalized) # context: (encoder_state) context = input_mat * att_weights return context
def __call__(self, words): dynet.renew_cg() word_ids = [self.vw.w2i.get(w, self.UNK) for w in words] wembs = [self._E[w] for w in word_ids] f_state = self._fwd_lstm.initial_state() b_state = self._bwd_lstm.initial_state() fw = [x.output() for x in f_state.add_inputs(wembs)] bw = [x.output() for x in b_state.add_inputs(reversed(wembs))] H = dynet.parameter(self._pH) O = dynet.parameter(self._pO) tags = [] for i, (f, b) in enumerate(zip(fw, reversed(bw))): r_t = O * (dynet.tanh(H * dynet.concatenate([f, b]))) out = dynet.softmax(r_t) tags.append(self.vt.i2w[np.argmax(out.npvalue())]) return tags
def predict_emb(self, chars): dy.renew_cg() finit = self.char_fwd_lstm.initial_state() binit = self.char_bwd_lstm.initial_state() H = dy.parameter(self.lstm_to_rep_params) Hb = dy.parameter(self.lstm_to_rep_bias) O = dy.parameter(self.mlp_out) Ob = dy.parameter(self.mlp_out_bias) pad_char = self.c2i[PADDING_CHAR] char_ids = [pad_char] + chars + [pad_char] embeddings = [self.char_lookup[cid] for cid in char_ids] bi_fwd_out = finit.transduce(embeddings) bi_bwd_out = binit.transduce(reversed(embeddings)) rep = dy.concatenate([bi_fwd_out[-1], bi_bwd_out[-1]]) return O * dy.tanh(H * rep + Hb) + Ob
def _mlp(self, W1_att_f, W1_att_e, w2_att, h_fs_matrix, h_e, F): E = W1_att_e * h_e a = dy.colwise_add(W1_att_f * h_fs_matrix, E) res = dy.transpose(dy.tanh(a)) * w2_att return res