Пример #1
0
 def test_pick_batch_elems(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     y = dy.pick_batch_elems(x, [0])
     self.assertTrue(np.allclose(y.npvalue(), self.pval[0]))
     z = dy.pick_batch_elems(x, [0, 1])
     self.assertTrue(np.allclose(z.npvalue(), self.pval.T))
Пример #2
0
 def test_pick_batch_elems(self):
     dy.renew_cg()
     x = dy.lookup_batch(self.p, [0, 1])
     y = dy.pick_batch_elems(x, [0])
     self.assertTrue(np.allclose(y.npvalue(), self.pval[0]))
     z = dy.pick_batch_elems(x, [0, 1])
     self.assertTrue(np.allclose(z.npvalue(), self.pval.T))
Пример #3
0
 def calc_loss(self, policy_reward, only_final_reward=True):
   loss = losses.FactoredLossExpr()
   ## Calculate baseline
   pred_reward, baseline_loss = self.calc_baseline_loss(policy_reward, only_final_reward)
   if only_final_reward:
     rewards = [policy_reward - pw_i for pw_i in pred_reward]
   else:
     rewards = [pr_i - pw_i for pr_i, pw_i in zip(policy_reward, pred_reward)]
   loss.add_loss("rl_baseline", baseline_loss)
   ## Z-Normalization
   rewards = dy.concatenate(rewards, d=0)
   if self.z_normalization:
     rewards_value = rewards.value()
     rewards_mean = np.mean(rewards_value)
     rewards_std = np.std(rewards_value) + 1e-10
     rewards = (rewards - rewards_mean) / rewards_std
   ## Calculate Confidence Penalty
   if self.confidence_penalty:
     cp_loss = self.confidence_penalty.calc_loss(self.policy_lls)
     loss.add_loss("rl_confpen", cp_loss)
   ## Calculate Reinforce Loss
   reinf_loss = []
   # Loop through all action in one sequence
   for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)):
     # Main Reinforce calculation
     reward = dy.pick(rewards, i)
     ll = dy.pick_batch(policy, action)
     if self.valid_pos is not None:
       ll = dy.pick_batch_elems(ll, self.valid_pos[i])
       reward = dy.pick_batch_elems(reward, self.valid_pos[i])
     reinf_loss.append(dy.sum_batches(ll * reward))
   loss.add_loss("rl_reinf", -self.weight * dy.esum(reinf_loss))
   ## the composed losses
   return loss
Пример #4
0
 def calc_baseline_loss(self, reward, only_final_reward):
   pred_rewards = []
   cur_losses = []
   for i, state in enumerate(self.states):
     pred_reward = self.baseline.transform(dy.nobackprop(state))
     pred_rewards.append(dy.nobackprop(pred_reward))
     seq_reward = reward if only_final_reward else reward[i]
     if self.valid_pos is not None:
       pred_reward = dy.pick_batch_elems(pred_reward, self.valid_pos[i])
       act_reward = dy.pick_batch_elems(seq_reward, self.valid_pos[i])
     else:
       act_reward = seq_reward
     cur_losses.append(dy.sum_batches(dy.squared_distance(pred_reward, dy.nobackprop(act_reward))))
   return pred_rewards, dy.esum(cur_losses)
Пример #5
0
 def output_and_loss(self, h_block, concat_t_block):
   concat_logit_block = self.output_affine(h_block, reconstruct_shape=False)
   bool_array = concat_t_block != 0
   indexes = np.argwhere(bool_array).ravel()
   concat_logit_block = dy.pick_batch_elems(concat_logit_block, indexes)
   concat_t_block = concat_t_block[bool_array]
   loss = dy.pickneglogsoftmax_batch(concat_logit_block, concat_t_block)
   return loss
Пример #6
0
 def split_batch(self, X, h):
   (n_rows, _), batch = X.dim()
   l = range(batch)
   steps = batch // h
   output = []
   for i in range(0, batch, steps):
     indexes = l[i:i + steps]
     output.append(dy.pick_batch_elems(X, indexes))
   return output
Пример #7
0
 def calc_baseline_loss(self, rewards):
     avg_rewards = dy.average(
         rewards)  # Taking average of the rewards accross multiple samples
     pred_rewards = []
     loss = []
     for i, state in enumerate(self.states):
         pred_reward = self.baseline(dy.nobackprop(state))
         pred_rewards.append(dy.nobackprop(pred_reward))
         if self.valid_pos is not None:
             pred_reward = dy.pick_batch_elems(pred_reward,
                                               self.valid_pos[i])
             avg_reward = dy.pick_batch_elems(avg_rewards,
                                              self.valid_pos[i])
         else:
             avg_reward = avg_rewards
         loss.append(
             dy.sum_batches(dy.squared_distance(pred_reward, avg_reward)))
     return pred_rewards, dy.esum(loss)
Пример #8
0
 def calc_loss(self, policy):
     if self.weight < 1e-8:
         return None
     neg_entropy = []
     for i, ll in enumerate(policy):
         if self.valid_pos is not None:
             ll = dy.pick_batch_elems(ll, self.valid_pos[i])
         loss = dy.sum_batches(dy.sum_elems(dy.cmult(dy.exp(ll), ll)))
         neg_entropy.append(dy.sum_batches(loss))
     return self.weight * dy.esum(neg_entropy)
Пример #9
0
 def calc_loss(self, rewards):
     loss = FactoredLossExpr()
     ## Z-Normalization
     if self.z_normalization:
         reward_batches = dy.concatenate_to_batch(rewards)
         mean_batches = dy.mean_batches(reward_batches)
         std_batches = dy.std_batches(reward_batches)
         rewards = [
             dy.cdiv(reward - mean_batches, std_batches)
             for reward in rewards
         ]
     ## Calculate baseline
     if self.baseline is not None:
         pred_reward, baseline_loss = self.calc_baseline_loss(rewards)
         loss.add_loss("rl_baseline", baseline_loss)
     ## Calculate Confidence Penalty
     if self.confidence_penalty:
         loss.add_loss("rl_confpen",
                       self.confidence_penalty.calc_loss(self.policy_lls))
     ## Calculate Reinforce Loss
     reinf_loss = []
     # Loop through all action in one sequence
     for i, (policy,
             action_sample) in enumerate(zip(self.policy_lls,
                                             self.actions)):
         # Discount the reward if we use baseline
         if self.baseline is not None:
             rewards = [reward - pred_reward[i] for reward in rewards]
         # Main Reinforce calculation
         sample_loss = []
         for action, reward in zip(action_sample, rewards):
             ll = dy.pick_batch(policy, action)
             if self.valid_pos is not None:
                 ll = dy.pick_batch_elems(ll, self.valid_pos[i])
                 reward = dy.pick_batch_elems(reward, self.valid_pos[i])
             sample_loss.append(dy.sum_batches(ll * reward))
         # Take the average of the losses accross multiple samples
         reinf_loss.append(dy.esum(sample_loss) / len(sample_loss))
     loss.add_loss("rl_reinf", self.weight * -dy.esum(reinf_loss))
     ## the composed losses
     return loss
Пример #10
0
  def calc_loss(self, policy_reward, results={}):
    """
    Calc policy networks loss.
    """
    assert len(policy_reward) == len(self.states), "There should be a reward for every action taken"
    batch_size = self.states[0].dim()[1]
    loss = {}

    # Calculate the baseline loss of the reinforce loss for each timestep:
    # b = W_b * s + b_b
    # R = r - b
    # Also calculate the baseline loss
    # b = r_p (predicted)
    # loss_b = squared_distance(r_p - r_r)
    rewards = []
    baseline_loss = []
    units = np.zeros(batch_size)
    for i, state in enumerate(self.states):
      r_p = self.baseline.transform(dy.nobackprop(state))
      rewards.append(policy_reward[i] - r_p)
      if self.valid_pos[i] is not None:
        r_p = dy.pick_batch_elems(r_p, self.valid_pos[i])
        r_r = dy.pick_batch_elems(policy_reward[i], self.valid_pos[i])
        units[self.valid_pos[i]] += 1
      else:
        r_r = policy_reward[i]
        units += 1
      baseline_loss.append(dy.sum_batches(dy.squared_distance(r_p, r_r)))
    loss["rl_baseline"] = losses.LossExpr(dy.esum(baseline_loss), units)

    # Z Normalization
    # R = R - mean(R) / std(R)
    rewards = dy.concatenate(rewards, d=0)
    r_dim = rewards.dim()
    if self.z_normalization:
      rewards_shape = dy.reshape(rewards, (r_dim[0][0], r_dim[1]))
      rewards_mean = dy.mean_elems(rewards_shape)
      rewards_std = dy.std_elems(rewards_shape) + 1e-20
      rewards = (rewards - rewards_mean.value()) / rewards_std.value()
    rewards = dy.nobackprop(rewards)
    # Calculate Confidence Penalty
    if self.confidence_penalty:
      loss["rl_confpen"] = self.confidence_penalty.calc_loss(self.policy_lls)

    # Calculate Reinforce Loss
    # L = - sum([R-b] * pi_ll)
    reinf_loss = []
    units = np.zeros(batch_size)
    for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)):
      reward = dy.pick(rewards, i)
      ll = dy.pick_batch(policy, action)
      if self.valid_pos[i] is not None:
        ll = dy.pick_batch_elems(ll, self.valid_pos[i])
        reward = dy.pick_batch_elems(reward, self.valid_pos[i])
        units[self.valid_pos[i]] += 1
      else:
        units += 1
      reinf_loss.append(dy.sum_batches(dy.cmult(ll, reward)))
    loss["rl_reinf"] = losses.LossExpr(-dy.esum(reinf_loss), units)

    # Pack up + return
    return losses.FactoredLossExpr(loss)
Пример #11
0
import dynet as dy
import numpy as np

m = dy.Model()
p = m.add_lookup_parameters((2, 3))
npp = np.asarray([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
p.init_from_array(npp)
dy.renew_cg()

x = dy.lookup_batch(p, [0, 1])
y = dy.pick_batch_elems(x, [0])
z = dy.pick_batch_elem(x, 1)
yz = dy.pick_batch_elems(x, [0, 1])
w = dy.concat_to_batch([y, z])

print x.npvalue()
print y.npvalue()
print yz.npvalue()
print w.npvalue()

loss = dy.dot_product(y, z)
loss.forward()

loss.backward()

print p.grad_as_array()
 def cached_embedding_lookup(self, toks):
     chunks = map(tuple, zip(*toks))
     cache_is = [self.cache_locs[chunk] for chunk in chunks]
     return dynet.pick_batch_elems(self.cached_embeddings, cache_is)
Пример #13
0
    def run(self, words, tags, heads, rels, masks_w, masks_t, isTrain):
        if config.biaffine:
            mlp_dep_bias = dy.parameter(self.mlp_dep_bias)
            mlp_dep = dy.parameter(self.mlp_dep)
            mlp_head_bias = dy.parameter(self.mlp_head_bias)
            mlp_head = dy.parameter(self.mlp_head)
            W_arc = dy.parameter(self.W_arc)
            W_rel = dy.parameter(self.W_rel)

        #tokens in the sentence and root
        seq_len = len(words) + 1

        punct_mask = np.array(
            [1 if rel != self._punct_id else 0 for rel in rels],
            dtype=np.uint32)

        preds_arc = []
        preds_rel = []

        loss_arc = 0
        loss_rel = 0

        num_cor_arc = 0
        num_cor_rel = 0

        if isTrain:
            # embs_w = [self.lp_w[w if w < self._vocab_size_w else 0] * mask_w for w, mask_w in zip(words, masks_w)]
            # embs_t = [self.lp_t[t if t < self._vocab_size_t else 0] * mask_t for t, mask_t in zip(tags, masks_t)]
            embs_w = [
                self.lp_w[w] * mask_w for w, mask_w in zip(words, masks_w)
            ]
            embs_t = [
                self.lp_t[t] * mask_t for t, mask_t in zip(tags, masks_t)
            ]
            embs_w = [self.emb_root[0] * masks_t[-1]] + embs_w
            embs_t = [self.emb_root[1] * masks_w[-1]] + embs_t

        else:
            # embs_w = [self.lp_w[w if w < self._vocab_size_w else 0] for w in words]
            # embs_t = [self.lp_t[t if t < self._vocab_size_t else 0] for t in tags]
            embs_w = [self.lp_w[w] for w in words]
            embs_t = [self.lp_t[t] for t in tags]
            embs_w = [self.emb_root[0]] + embs_w
            embs_t = [self.emb_root[1]] + embs_t

        lstm_ins = [
            dy.concatenate([emb_w, emb_t])
            for emb_w, emb_t in zip(embs_w, embs_t)
        ]
        # lstm_outs = dy.concatenate_cols([self.emb_root[0]] + utils.bilstm(self.l2r_lstm, self.r2l_lstm, lstm_ins, self._pdrop))
        # lstm_outs = dy.concatenate_cols(utils.bilstm(self.LSTM_builders[0], self.LSTM_builders[1], lstm_ins, self._pdrop_lstm))
        lstm_outs = dy.concatenate_cols(
            utils.biLSTM(self.LSTM_builders, lstm_ins, None, self._pdrop_lstm,
                         self._pdrop_lstm))

        # if isTrain:
        #     lstm_outs = dy.dropout(lstm_outs, self._pdrop)

        if config.biaffine:
            embs_dep, embs_head = \
                utils.leaky_relu(dy.affine_transform([mlp_dep_bias, mlp_dep, lstm_outs])), \
                utils.leaky_relu(dy.affine_transform([mlp_head_bias, mlp_head, lstm_outs]))

            if isTrain:
                embs_dep, embs_head = dy.dropout(embs_dep,
                                                 self._pdrop_mlp), dy.dropout(
                                                     embs_head,
                                                     self._pdrop_mlp)

            dep_arc, dep_rel = embs_dep[:self._arc_dim], embs_dep[self.
                                                                  _arc_dim:]
            head_arc, head_rel = embs_head[:self.
                                           _arc_dim], embs_head[self._arc_dim:]

            logits_arc = utils.bilinear(dep_arc, W_arc, head_arc,
                                        self._arc_dim, seq_len,
                                        config.batch_size, 1,
                                        self.biaffine_bias_x_arc,
                                        self.biaffine_bias_y_arc)
        else:
            mlp = dy.parameter(self.mlp)
            mlp_bias = dy.parameter(self.mlp_bias)

            embs = \
                utils.leaky_relu(dy.affine_transform([mlp_bias, mlp, lstm_outs]))
            if isTrain:
                embs = dy.dropout(embs, self._pdrop_mlp)

            embs_arc, embs_rel = embs[:self._arc_dim * 2], embs[self._arc_dim *
                                                                2:]

            W_r_arc = dy.parameter(self.V_r_arc)
            W_i_arc = dy.parameter(self.V_i_arc)
            bias_arc = dy.parameter(self.bias_arc)

            logits_arc = utils.biED(embs_arc,
                                    W_r_arc,
                                    W_i_arc,
                                    embs_arc,
                                    seq_len,
                                    1,
                                    bias=bias_arc)

        # flat_logits_arc = dy.reshape(logits_arc[:][1:], (seq_len,), seq_len - 1)
        flat_logits_arc = dy.reshape(logits_arc, (seq_len, ), seq_len)
        # flat_logits_arc = dy.pick_batch_elems(flat_logits_arc, [e for e in range(1, seq_len)])
        flat_logits_arc = dy.pick_batch_elems(
            flat_logits_arc, np.arange(1, seq_len, dtype='int32'))

        loss_arc = dy.pickneglogsoftmax_batch(flat_logits_arc, heads)

        if not isTrain:
            # msk = [1] * seq_len
            msk = np.ones((seq_len), dtype='int32')
            arc_probs = dy.softmax(logits_arc).npvalue()
            arc_probs = np.transpose(arc_probs)
            preds_arc = utils.arc_argmax(arc_probs,
                                         seq_len,
                                         msk,
                                         ensure_tree=True)

            # preds_arc = logits_arc.npvalue().argmax(0)
            cor_arcs = np.multiply(np.equal(preds_arc[1:], heads), punct_mask)
            num_cor_arc = np.sum(cor_arcs)

        if not config.las:
            return loss_arc, num_cor_arc, num_cor_rel

        if config.biaffine:
            logits_rel = utils.bilinear(dep_rel, W_rel, head_rel,
                                        self._rel_dim, seq_len, 1,
                                        self._vocab_size_r,
                                        self.biaffine_bias_x_rel,
                                        self.biaffine_bias_y_rel)
        else:
            V_r_rel = dy.parameter(self.V_r_rel)
            V_i_rel = dy.parameter(self.V_i_rel)
            bias_rel = dy.parameter(self.bias_rel)

            logits_rel = utils.biED(embs_rel,
                                    V_r_rel,
                                    V_i_rel,
                                    embs_rel,
                                    seq_len,
                                    self._vocab_size_r,
                                    bias=bias_rel)

        # flat_logits_rel = dy.reshape(logits_rel[:][1:], (seq_len, self._vocab_size_r), seq_len - 1)
        flat_logits_rel = dy.reshape(logits_rel, (seq_len, self._vocab_size_r),
                                     seq_len)
        # flat_logits_rel = dy.pick_batch_elems(flat_logits_rel, [e for e in range(1, seq_len)])
        flat_logits_rel = dy.pick_batch_elems(
            flat_logits_rel, np.arange(1, seq_len, dtype='int32'))

        partial_rel_logits = dy.pick_batch(flat_logits_rel,
                                           heads if isTrain else preds_arc[1:])

        if isTrain:
            loss_rel = dy.sum_batches(
                dy.pickneglogsoftmax_batch(partial_rel_logits, rels))
        else:
            preds_rel = partial_rel_logits.npvalue().argmax(0)
            num_cor_rel = np.sum(
                np.multiply(np.equal(preds_rel, rels), cor_arcs))
        return loss_arc + loss_rel, num_cor_arc, num_cor_rel