Exemplo n.º 1
0
 def encode(input_, lengths):
     forward, backward = lstm(input_)
     states = dy.concatenate_cols(forward)
     final_states_forward = dy.pick_batch(states, lengths, dim=1)
     states = dy.concatenate_cols(backward)
     final_states_backward = dy.pick_batch(states, lengths, dim=1)
     return dy.concatenate([final_states_forward, final_states_backward])
Exemplo n.º 2
0
 def generate_output(self,
                     translator,
                     initial_state,
                     src_length=None,
                     forced_trg_ids=None):
     # Output variables
     score = []
     word_ids = []
     attentions = []
     logsoftmaxes = []
     states = []
     masks = []
     # Search Variables
     done = None
     current_state = initial_state
     for length in range(self.max_len):
         prev_word = word_ids[length - 1] if length > 0 else None
         current_output = translator.generate_one_step(
             prev_word, current_state)
         current_state = current_output.state
         if forced_trg_ids is None:
             word_id = np.argmax(current_output.logsoftmax.npvalue(),
                                 axis=0)
             if len(word_id.shape) == 2:
                 word_id = word_id[0]
         else:
             if batchers.is_batched(forced_trg_ids):
                 word_id = [
                     forced_trg_ids[i][length]
                     for i in range(len(forced_trg_ids))
                 ]
             else:
                 word_id = [forced_trg_ids[length]]
         logsoft = dy.pick_batch(current_output.logsoftmax, word_id)
         if done is not None:
             word_id = [
                 word_id[i] if not done[i] else Vocab.ES
                 for i in range(len(done))
             ]
             # masking for logsoftmax
             mask = [1 if not done[i] else 0 for i in range(len(done))]
             logsoft = dy.cmult(logsoft, dy.inputTensor(mask, batched=True))
             masks.append(mask)
         # Packing outputs
         score.append(logsoft.npvalue())
         word_ids.append(word_id)
         attentions.append(current_output.attention)
         logsoftmaxes.append(
             dy.pick_batch(current_output.logsoftmax, word_id))
         states.append(translator.get_nobp_state(current_state))
         # Check if we are done.
         done = [x == Vocab.ES for x in word_id]
         if all(done):
             break
     masks.insert(0, [1 for _ in range(len(done))])
     words = np.stack(word_ids, axis=1)
     score = np.sum(score, axis=0)
     return [
         SearchOutput(words, attentions, score, logsoftmaxes, states, masks)
     ]
 def predict_chunks_by_tokens(self, w_t, chunk_batch):
     ender = [self.lattice_vocab.chunk_end.i] * self.BATCH_SIZE
     lps = []
     state = self.lattice_rnn.initial_state(dropout=self.DROPOUT)
     cs = [[self.lattice_vocab.chunk_start.i] * self.BATCH_SIZE
           ] + chunk_batch
     cum_lp = dynet.scalarInput(0.0, device=self.args.param_device)
     for i, (cc, nc) in enumerate(zip(cs, cs[1:])):
         if self.args.concat_context_vector:
             x_t = dynet.pick_batch(self.vocab_R, cc)
             state.add_input(x_t)
         else:
             if i == 0:
                 state.add_input(self.project_main_to_lattice_init_R * w_t)
             else:
                 x_t = dynet.pick_batch(self.vocab_R, cc)
                 state.add_input(x_t)
         y_t = state.output()
         y_t = dynet.to_device(y_t, self.args.param_device)
         if self.DROPOUT:
             y_t = dynet.cmult(y_t, self.dropout_mask_lattice_y_t)
         if self.args.concat_context_vector:
             y_t = dynet.concatenate([y_t, w_t])
         r_t = dynet.affine_transform([
             self.vocab_bias, self.vocab_R,
             dynet.tanh(
                 dynet.affine_transform(
                     [self.lattice_bias, self.lattice_R, y_t]))
         ])
         if i > 0:
             lps.append(cum_lp + -dynet.pickneglogsoftmax_batch(r_t, ender))
         cum_lp = cum_lp + -dynet.pickneglogsoftmax_batch(r_t, nc)
     lps.append(cum_lp)
     return lps
Exemplo n.º 4
0
	def cross_entropy_loss(self, score, next_word, cur_word):
		if self.__ls:
			log_prob = dy.log_softmax(score)
			if self.__lm is None:
				loss = - dy.pick_batch(log_prob, next_word) * (1 - self.__ls_eps) - \
					dy.mean_elems(log_prob) * self.__ls_eps
			else:
				loss = - dy.pick_batch(log_prob, next_word) * (1 - self.__ls_eps) - \
					dy.dot_product(self.__lm.next_expr(cur_word), log_prob) * self.__ls_eps
		else:
			loss = dy.pickneglogsoftmax(score, next_word)
		return loss
Exemplo n.º 5
0
    def predict_logprobs(self, X, Y):
        """
        Returns the log probabilities of the predictions for this model (batched version).
        Returns a matrix of log probabilities.
        @param X: the input indexes from which to predict 
        @param Y: a list of references indexes for which to extract the prob. 
        @return the matrix of predicted logprobabilities for each of the provided ref y in Y
        as a numpy array
        """
        assert (len(X) == len(Y))
        assert (all([len(x) == len(y) for x, y in zip(X, Y)]))

        nlines = len(X)
        X = zip(*X)  #transposes the batch
        Y = zip(*Y)  #transposes the batch
        if self.tied:
            dy.renew_cg()
            state = self.rnn.initial_state()
            E = dy.parameter(self.embedding_matrix)
            preds = []
            lookups = [dy.pick_batch(E, xcolumn) for xcolumn in X]
            outputs = state.transduce(lookups)
            ypred_batch = [
                dy.pickneglogsoftmax_batch(E * lstm_out, y)
                for lstm_out, y in zip(outputs, Y)
            ]
            dy.forward(ypred_batch)
            if nlines > 1:
                preds = [(-col.npvalue()).tolist()[0] for col in ypred_batch]
            else:
                preds = [(-col.npvalue()).tolist() for col in ypred_batch]
            return list(zip(*preds))  #final back transposition
        else:
            dy.renew_cg()
            state = self.rnn.initial_state()
            O = dy.parameter(self.output_weights)
            E = dy.parameter(self.embedding_matrix)
            preds = []
            lookups = [dy.pick_batch(E, xcolumn) for xcolumn in X]
            outputs = state.transduce(lookups)
            ypred_batch = [
                dy.pickneglogsoftmax_batch(O * lstm_out, y)
                for lstm_out, y in zip(outputs, Y)
            ]
            dy.forward(ypred_batch)
            preds = [(-col.npvalue()).tolist()[0] for col in ypred_batch]
            if nlines > 1:
                preds = [(-col.npvalue()).tolist()[0] for col in ypred_batch]
            else:
                preds = [(-col.npvalue()).tolist() for col in ypred_batch]
            return list(zip(*preds))  #final back transposition
Exemplo n.º 6
0
 def cross_entropy_loss(self, s, nw, cw):
     """Calculates the cross-entropy
     """
     if self.ls:
         log_prob = dy.log_softmax(s)
         if self.lm is None:
             loss = - dy.pick_batch(log_prob, nw) * (1 - self.ls_eps) - \
                 dy.mean_elems(log_prob) * self.ls_eps
         else:
             loss = - dy.pick_batch(log_prob, nw) * (1 - self.ls_eps) - \
                 dy.dot_product(self.lm_e, log_prob) * self.ls_eps
     else:
         loss = dy.pickneglogsoftmax_batch(s, nw)
     return loss
 def compress_chunk(self, chunks, masks=None):
     compression_batch_size = len(chunks[0])
     # token_embeddings = [dynet.reshape(dynet.select_cols(self.vocab_lookup, tokens), (self.args.dim,), compression_batch_size)
     # token_embeddings = [dynet.reshape(dynet.transpose(dynet.select_rows(self.vocab_R, tokens)), (self.args.dim,), compression_batch_size)
     token_embeddings = [
         dynet.pick_batch(self.vocab_R, tokens) for tokens in chunks
     ]
     fwd_state = self.lattice_fwd_comp_rnn.initial_state(
         mb_size=compression_batch_size, dropout=self.DROPOUT)
     bwd_state = self.lattice_bwd_comp_rnn.initial_state(
         mb_size=compression_batch_size, dropout=self.DROPOUT)
     if masks is None:
         fwd_emb = fwd_state.transduce(token_embeddings)[-1]
         bwd_emb = bwd_state.transduce(list(reversed(token_embeddings)))[-1]
     else:
         masks = [
             dynet.inputTensor(
                 mask, batched=True, device=self.args.param_device)
             if min(mask) == 0 else None for mask in masks
         ]
         fwd_emb = fwd_state.transduce(token_embeddings, masks)[-1]
         bwd_emb = bwd_state.transduce(reversed(token_embeddings),
                                       reversed(masks))[-1]
     emb = dynet.concatenate([fwd_emb, bwd_emb])
     emb = dynet.to_device(emb, self.args.param_device)
     return emb
 def get_chunk_embedding(self, chunks, masks=None):
     if masks is None:
         merged_chunks = [
             self.lattice_vocab.pp(chunk)
             for chunk in map(list, zip(*chunks))
         ]
     else:
         merged_chunks = [
             self.lattice_vocab.masked_pp(chunk, mask)
             for chunk, mask in zip(map(list, zip(
                 *chunks)), map(list, zip(*masks)))
         ]
     chunk_emb_is = [
         self.chunk_vocab[chunk].i if chunk in self.chunk_vocab.strings else
         self.chunk_vocab['<chunk_unk>'].i for chunk in merged_chunks
     ]
     # fixed_embs = dynet.reshape(dynet.transpose(dynet.select_rows(self.chunk_vocab_R, chunk_emb_is)), (self.args.dim,), len(chunk_emb_is))
     # fixed_embs = dynet.reshape(dynet.select_cols(self.chunk_vocab_lookup, chunk_emb_is), (self.args.dim,), len(chunk_emb_is))
     fixed_embs = dynet.pick_batch(self.chunk_vocab_R, chunk_emb_is)
     if self.args.no_dynamic_embs:
         return fixed_embs
     else:
         dynamic_embs = self.compress_chunk(chunks, masks)
         full_embs = dynet.concatenate([fixed_embs, dynamic_embs])
         return full_embs
Exemplo n.º 9
0
 def on_calc_additional_loss(self, reward):
     if not self.learn_segmentation:
         return None
     ret = LossBuilder()
     if self.length_prior_alpha > 0:
         reward += self.segment_length_prior * self.length_prior_alpha
     reward = dy.cdiv(reward - dy.mean_batches(reward),
                      dy.std_batches(reward))
     # Baseline Loss
     if self.use_baseline:
         baseline_loss = []
         for i, baseline in enumerate(self.bs):
             baseline_loss.append(dy.squared_distance(reward, baseline))
         ret.add_loss("Baseline", dy.esum(baseline_loss))
     # Reinforce Loss
     lmbd = self.lmbd.get_value(self.warmup_counter)
     if lmbd > 0.0:
         reinforce_loss = []
         # Calculating the loss of the baseline and reinforce
         for i in range(len(self.segment_decisions)):
             ll = dy.pick_batch(self.segment_logsoftmaxes[i],
                                self.segment_decisions[i])
             if self.use_baseline:
                 r_i = reward - self.bs[i]
             else:
                 r_i = reward
             reinforce_loss.append(dy.logistic(r_i) * ll)
         ret.add_loss("Reinforce", -dy.esum(reinforce_loss) * lmbd)
     # Total Loss
     return ret
Exemplo n.º 10
0
 def sample_one(
     self,
     translator: 'xnmt.models.translators.AutoRegressiveTranslator',
     initial_state: decoders.AutoRegressiveDecoderState,
     forced_trg_ids: Optional[Sequence[numbers.Integral]] = None
 ) -> SearchOutput:
     # Search variables
     current_words = None
     current_state = initial_state
     done = None
     # Outputs
     logsofts = []
     samples = []
     states = []
     attentions = []
     masks = []
     # Sample to the max length
     for length in range(self.max_len):
         translator_output = translator.generate_one_step(
             current_words, current_state)
         if forced_trg_ids is None:
             sample = translator_output.logsoftmax.tensor_value(
             ).categorical_sample_log_prob().as_numpy()
             if len(sample.shape) == 2:
                 sample = sample[0]
         else:
             sample = [
                 forced_trg[length]
                 if forced_trg.sent_len() > length else Vocab.ES
                 for forced_trg in forced_trg_ids
             ]
         logsoft = dy.pick_batch(translator_output.logsoftmax, sample)
         if done is not None:
             sample = [
                 sample[i] if not done[i] else Vocab.ES
                 for i in range(len(done))
             ]
             # masking for logsoftmax
             mask = [1 if not done[i] else 0 for i in range(len(done))]
             logsoft = dy.cmult(logsoft, dy.inputTensor(mask, batched=True))
             masks.append(mask)
         # Appending output
         logsofts.append(logsoft)
         samples.append(sample)
         states.append(translator.get_nobp_state(translator_output.state))
         attentions.append(translator_output.attention)
         # Next time step
         current_words = sample
         current_state = translator_output.state
         # Check done
         done = [x == Vocab.ES for x in sample]
         # Check if we are done.
         if all(done):
             break
     # Packing output
     scores = dy.esum(logsofts).npvalue()
     masks.insert(0, [1 for _ in range(len(done))])
     samples = np.stack(samples, axis=1)
     return SearchOutput(samples, attentions, scores, logsofts, states,
                         masks)
Exemplo n.º 11
0
    def calc_loss(
            self, x: dy.Expression,
            y: Union[numbers.Integral,
                     List[numbers.Integral]]) -> dy.Expression:
        if self.can_loss_be_derived_from_scores():
            scores = self.calc_scores(x)
            # single mode
            if not batchers.is_batched(y):
                loss = dy.pickneglogsoftmax(scores, y)
            # minibatch mode
            else:
                loss = dy.pickneglogsoftmax_batch(scores, y)
        else:
            log_prob = self.calc_log_probs(x)
            if not batchers.is_batched(y):
                loss = -dy.pick(log_prob, y)
            else:
                loss = -dy.pick_batch(log_prob, y)

            if self.label_smoothing > 0:
                ls_loss = -dy.mean_elems(log_prob)
                loss = ((1 - self.label_smoothing) *
                        loss) + (self.label_smoothing * ls_loss)

        return loss
Exemplo n.º 12
0
    def decode_loss(self, src_encodings, tgt_seqs):
        """
        :param tgt_seqs: (tgt_heads, tgt_labels): list (length=batch_size) of (src_len)
        """

        # todo(NOTE): Sentences should start with empty token (as root of dependency tree)!

        tgt_heads, tgt_labels = tgt_seqs

        src_len = len(tgt_heads[0])
        batch_size = len(tgt_heads)
        np_tgt_heads = np.array(tgt_heads).flatten()  # (src_len * batch_size)
        np_tgt_labels = np.array(tgt_labels).flatten()
        s_arc, s_label = self.cal_scores(src_encodings)  # (src_len, src_len, bs), ([(src_len, src_len, bs)])

        s_arc_value = s_arc.npvalue()
        s_arc_choice = np.argmax(s_arc_value, axis=0).transpose().flatten()  # (src_len * batch_size)

        s_pick_labels = [dy.pick_batch(dy.reshape(score, (src_len,), batch_size=src_len * batch_size), s_arc_choice)
                     for score in s_label]
        s_argmax_labels = dy.concatenate(s_pick_labels, d=0)  # n_labels, src_len * batch_size

        reshape_s_arc = dy.reshape(s_arc, (src_len,), batch_size=src_len * batch_size)
        arc_loss = dy.pickneglogsoftmax_batch(reshape_s_arc, np_tgt_heads)
        label_loss = dy.pickneglogsoftmax_batch(s_argmax_labels, np_tgt_labels)

        loss = dy.sum_batches(arc_loss + label_loss) / batch_size
        return loss
Exemplo n.º 13
0
 def calc_loss(self, policy_reward, only_final_reward=True):
   loss = losses.FactoredLossExpr()
   ## Calculate baseline
   pred_reward, baseline_loss = self.calc_baseline_loss(policy_reward, only_final_reward)
   if only_final_reward:
     rewards = [policy_reward - pw_i for pw_i in pred_reward]
   else:
     rewards = [pr_i - pw_i for pr_i, pw_i in zip(policy_reward, pred_reward)]
   loss.add_loss("rl_baseline", baseline_loss)
   ## Z-Normalization
   rewards = dy.concatenate(rewards, d=0)
   if self.z_normalization:
     rewards_value = rewards.value()
     rewards_mean = np.mean(rewards_value)
     rewards_std = np.std(rewards_value) + 1e-10
     rewards = (rewards - rewards_mean) / rewards_std
   ## Calculate Confidence Penalty
   if self.confidence_penalty:
     cp_loss = self.confidence_penalty.calc_loss(self.policy_lls)
     loss.add_loss("rl_confpen", cp_loss)
   ## Calculate Reinforce Loss
   reinf_loss = []
   # Loop through all action in one sequence
   for i, (policy, action) in enumerate(zip(self.policy_lls, self.actions)):
     # Main Reinforce calculation
     reward = dy.pick(rewards, i)
     ll = dy.pick_batch(policy, action)
     if self.valid_pos is not None:
       ll = dy.pick_batch_elems(ll, self.valid_pos[i])
       reward = dy.pick_batch_elems(reward, self.valid_pos[i])
     reinf_loss.append(dy.sum_batches(ll * reward))
   loss.add_loss("rl_reinf", -self.weight * dy.esum(reinf_loss))
   ## the composed losses
   return loss
Exemplo n.º 14
0
 def cross_entropy_loss(self, scores, next_words):
     if self.label_smoothing:
         log_softmax = dy.log_softmax(scores)
         return -dy.pick_batch(log_softmax, next_words) * (1 - self.label_smoothing) \
                - dy.mean_elems(log_softmax) * self.label_smoothing
     else:
         return dy.pickneglogsoftmax_batch(scores, next_words)
Exemplo n.º 15
0
    def calc_loss(self, mlp_dec_state, ref_action):
        """
        Label Smoothing is implemented with reference to Section 7 of the paper
        "Rethinking the Inception Architecture for Computer Vision"
        (https://arxiv.org/pdf/1512.00567.pdf)
        """
        scores = self.get_scores(mlp_dec_state)

        if self.label_smoothing == 0.0:
            # single mode
            if not xnmt.batcher.is_batched(ref_action):
                return dy.pickneglogsoftmax(scores, ref_action)
            # minibatch mode
            else:
                return dy.pickneglogsoftmax_batch(scores, ref_action)

        else:
            log_prob = dy.log_softmax(scores)
            if not xnmt.batcher.is_batched(ref_action):
                pre_loss = -dy.pick(log_prob, ref_action)
            else:
                pre_loss = -dy.pick_batch(log_prob, ref_action)

            ls_loss = -dy.mean_elems(log_prob)
            loss = ((1 - self.label_smoothing) *
                    pre_loss) + (self.label_smoothing * ls_loss)
            return loss
Exemplo n.º 16
0
    def calc_loss(
            self, x: dy.Expression,
            y: Union[numbers.Integral,
                     List[numbers.Integral]]) -> dy.Expression:

        scores = self.calc_scores(x)

        if self.label_smoothing == 0.0:
            # single mode
            if not batchers.is_batched(y):
                loss = dy.pickneglogsoftmax(scores, y)
            # minibatch mode
            else:
                loss = dy.pickneglogsoftmax_batch(scores, y)
        else:
            log_prob = dy.log_softmax(scores)
            if not batchers.is_batched(y):
                pre_loss = -dy.pick(log_prob, y)
            else:
                pre_loss = -dy.pick_batch(log_prob, y)

            ls_loss = -dy.mean_elems(log_prob)
            loss = ((1 - self.label_smoothing) *
                    pre_loss) + (self.label_smoothing * ls_loss)

        return loss
Exemplo n.º 17
0
 def embed(self, x):
   if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
     batch_size = x.batch_size() if xnmt.batcher.is_batched(x) else 1
     self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)]
   emb_e = dy.parameter(self.embeddings)
   # single mode
   if not xnmt.batcher.is_batched(x):
     if self.train and self.word_id_mask and x in self.word_id_mask[0]:
       ret = dy.zeros((self.emb_dim,))
     else:
       ret = dy.pick(emb_e, index=x)
       if self.fix_norm is not None:
         ret = dy.cdiv(ret, dy.l2_norm(ret))
         if self.fix_norm != 1:
           ret *= self.fix_norm
   # minibatch mode
   else:
     ret = dy.pick_batch(emb_e, x)
     if self.fix_norm is not None:
       ret = dy.cdiv(ret, dy.l2_norm(ret))
       if self.fix_norm != 1:
         ret *= self.fix_norm
     if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(x.batch_size())):
       dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(x.batch_size())]), batched=True)
       ret = dy.cmult(ret, dropout_mask)
   if self.train and self.weight_noise > 0.0:
     ret = dy.noise(ret, self.weight_noise)
   return ret
Exemplo n.º 18
0
    def decode_loss(self, src_encodings, tgt_seqs):
        """
        :param tgt_seqs: (tgt_heads, tgt_labels): list (length=batch_size) of (src_len)
        """

        # todo(NOTE): Sentences should start with empty token (as root of dependency tree)!

        tgt_heads, tgt_labels = tgt_seqs

        src_len = len(tgt_heads[0])
        batch_size = len(tgt_heads)
        np_tgt_heads = np.array(tgt_heads).flatten()  # (src_len * batch_size)
        np_tgt_labels = np.array(tgt_labels).flatten()
        s_arc, s_label = self.cal_scores(src_encodings)  # (src_len, src_len, bs), ([(src_len, src_len, bs)])

        s_arc_value = s_arc.npvalue()
        s_arc_choice = np.argmax(s_arc_value, axis=0).transpose().flatten()  # (src_len * batch_size)

        s_pick_labels = [dy.pick_batch(dy.reshape(score, (src_len,), batch_size=src_len * batch_size), s_arc_choice)
                     for score in s_label]
        s_argmax_labels = dy.concatenate(s_pick_labels, d=0)  # n_labels, src_len * batch_size

        reshape_s_arc = dy.reshape(s_arc, (src_len,), batch_size=src_len * batch_size)
        arc_loss = dy.pickneglogsoftmax_batch(reshape_s_arc, np_tgt_heads)
        label_loss = dy.pickneglogsoftmax_batch(s_argmax_labels, np_tgt_labels)

        loss = dy.sum_batches(arc_loss + label_loss) / batch_size
        return loss
Exemplo n.º 19
0
  def __call__(self, translator, dec_state, src, trg):
    # TODO: apply trg.mask ?
    samples = []
    logsofts = []
    self.bs = []
    done = [False for _ in range(len(trg))]
    for _ in range(self.sample_length):
      dec_state.context = translator.attender.calc_context(dec_state.rnn_state.output())
      if self.use_baseline:
        h_t = dy.tanh(translator.decoder.context_projector(dy.concatenate([dec_state.rnn_state.output(), dec_state.context])))
        self.bs.append(self.baseline(dy.nobackprop(h_t)))
      logsoft = dy.log_softmax(translator.decoder.get_scores(dec_state))
      sample = logsoft.tensor_value().categorical_sample_log_prob().as_numpy()[0]
      # Keep track of previously sampled EOS
      sample = [sample_i if not done_i else Vocab.ES for sample_i, done_i in zip(sample, done)]
      # Appending and feeding in the decoder
      logsoft = dy.pick_batch(logsoft, sample)
      logsofts.append(logsoft)
      samples.append(sample)
      dec_state = translator.decoder.add_input(dec_state, translator.trg_embedder.embed(xnmt.batcher.mark_as_batch(sample)))
      # Check if we are done.
      done = list(six.moves.map(lambda x: x == Vocab.ES, sample))
      if all(done):
        break

    samples = np.stack(samples, axis=1).tolist()
    self.eval_score = []
    for trg_i, sample_i in zip(trg, samples):
      # Removing EOS
      try:
        idx = sample_i.index(Vocab.ES)
        sample_i = sample_i[:idx]
      except ValueError:
        pass
      try:
        idx = trg_i.words.index(Vocab.ES)
        trg_i.words = trg_i.words[:idx]
      except ValueError:
        pass
      # Calculate the evaluation score
      score = 0 if not len(sample_i) else self.evaluation_metric.evaluate_fast(trg_i.words, sample_i)
      self.eval_score.append(score)
    self.true_score = dy.inputTensor(self.eval_score, batched=True)
    loss = LossBuilder()

    if self.use_baseline:
      for i, (score, _) in enumerate(zip(self.bs, logsofts)):
        logsofts[i] = dy.cmult(logsofts[i], score - self.true_score)
      loss.add_loss("Reinforce", dy.sum_elems(dy.esum(logsofts)))

    else:
        loss.add_loss("Reinforce", dy.sum_elems(dy.cmult(-self.true_score, dy.esum(logsofts))))

    if self.use_baseline:
      baseline_loss = []
      for bs in self.bs:
        baseline_loss.append(dy.squared_distance(self.true_score, bs))
      loss.add_loss("Baseline", dy.sum_elems(dy.esum(baseline_loss)))
    return loss
Exemplo n.º 20
0
 def score_one_sequence(self, tag_scores, tags, batch_size):
     ''' tags: list of tag ids at each time step '''
     # print tags, batch_size
     # print batch_size
     # print "scoring one sentence"
     tags = [[self.start_id] * batch_size
             ] + tags  # len(tag_scores) = len(tags) - 1
     score = dy.inputTensor(np.zeros(batch_size), batched=True)
     # tag_scores = dy.concatenate_cols(tag_scores) # tot_tags, sent_len, batch_size
     # print "tag dim: ", tag_scores.dim()
     for i in range(len(tags) - 1):
         score += dy.pick_batch(dy.lookup_batch(self.transition_matrix, tags[i + 1]), tags[i]) \
                  + dy.pick_batch(tag_scores[i], tags[i + 1])
     score += dy.pick_batch(
         dy.lookup_batch(self.transition_matrix,
                         [self.end_id] * batch_size), tags[-1])
     return score
Exemplo n.º 21
0
    def predict_logprobs(self,X,Y,structural=True,hidden_out=False):
        """
        Returns the log probabilities of the predictions for this model (batched version).

        @param X: the input indexes from which to predict (each xdatum is expected to be an iterable of integers) 
        @param Y: a list of references indexes for which to extract the prob
        @param structural: switches between structural and lexical logprob evaluation
        @param hidden_out: outputs an additional list of hidden dimension vectors
        @return the list of predicted logprobabilities for each of the provided ref y in Y
        """
        assert(len(X) == len(Y))
        assert(all(len(x) == self.input_length for x in X))

        if structural:
            dy.renew_cg()
            W = dy.parameter(self.hidden_weights)
            E = dy.parameter(self.input_embeddings)
            A = dy.parameter(self.action_weights)
            
            batched_X  = zip(*X) #transposes the X matrix
            embeddings = [dy.pick_batch(E, xcolumn) for xcolumn in batched_X]
            xdense     = dy.concatenate(embeddings)
            preds      = dy.pickneglogsoftmax_batch(A * dy.tanh( W * xdense ),Y).value()
            return [-ypred  for ypred in preds]

        else:#lexical
            if self.tied:
                dy.renew_cg()
                W = dy.parameter(self.hidden_weights)
                E = dy.parameter(self.input_embeddings)
                batched_X  = zip(*X) #transposes the X matrix
                embeddings = [dy.pick_batch(E, xcolumn) for xcolumn in batched_X]
                xdense     = dy.concatenate(embeddings)
                preds      = dy.pickneglogsoftmax_batch(E * dy.tanh( W * xdense ),Y).value()
                return [-ypred  for ypred in preds]
            else:
                dy.renew_cg()
                O = dy.parameter(self.output_embeddings)
                W = dy.parameter(self.hidden_weights)
                E = dy.parameter(self.input_embeddings)
                batched_X  = zip(*X) #transposes the X matrix
                embeddings = [dy.pick_batch(E, xcolumn) for xcolumn in batched_X]
                xdense     = dy.concatenate(embeddings)
                preds      = dy.pickneglogsoftmax_batch(O * dy.tanh( W * xdense ),Y).value()
                return [-ypred  for ypred in preds]
Exemplo n.º 22
0
 def _embed_word(self, word, is_batched):
     if is_batched:
         embedding = dy.pick_batch(
             self.embeddings,
             word) if self.is_dense else self.embeddings.batch(word)
     else:
         embedding = dy.pick(
             self.embeddings,
             index=word) if self.is_dense else self.embeddings[word]
     return embedding
Exemplo n.º 23
0
  def on_calc_additional_loss(self, translator_loss):
    if not self.learn_segmentation or self.segment_decisions is None:
      return None
    reward = -translator_loss["mle"]
    if not self.log_reward:
      reward = dy.exp(reward)
    reward = dy.nobackprop(reward)

    # Make sure that reward is not scalar, but rather based on the each batch item
    assert reward.dim()[1] == len(self.src_sent)
    # Mask
    enc_mask = self.enc_mask.get_active_one_mask().transpose() if self.enc_mask is not None else None
    # Compose the lose
    ret = LossBuilder()
    ## Length prior
    alpha = self.length_prior_alpha.value() if self.length_prior_alpha is not None else 0
    if alpha > 0:
      reward += self.segment_length_prior * alpha
    # reward z-score normalization
    if self.z_normalization:
      reward = dy.cdiv(reward-dy.mean_batches(reward), dy.std_batches(reward) + EPS)
    ## Baseline Loss
    if self.use_baseline:
      baseline_loss = []
      for i, baseline in enumerate(self.bs):
        loss = dy.squared_distance(reward, baseline)
        if enc_mask is not None:
          loss = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), loss)
        baseline_loss.append(loss)

      ret.add_loss("Baseline", dy.esum(baseline_loss))

    if self.print_sample:
      print(dy.exp(self.segment_logsoftmaxes[i]).npvalue().transpose()[0])
    ## Reinforce Loss
    lmbd = self.lmbd.value()
    if lmbd > 0.0:
      reinforce_loss = []
      # Calculating the loss of the baseline and reinforce
      for i in range(len(self.segment_decisions)):
        ll = dy.pick_batch(self.segment_logsoftmaxes[i], self.segment_decisions[i])
        if self.use_baseline:
          r_i = reward - dy.nobackprop(self.bs[i])
        else:
          r_i = reward
        if enc_mask is not None:
          ll = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), ll)
        reinforce_loss.append(r_i * -ll)
      loss = dy.esum(reinforce_loss) * lmbd
      ret.add_loss("Reinforce", loss)
    if self.confidence_penalty:
      ls_loss = self.confidence_penalty(self.segment_logsoftmaxes, enc_mask)
      ret.add_loss("Confidence Penalty", ls_loss)
    # Total Loss
    return ret
Exemplo n.º 24
0
 def next(self, w, c, test=True, state=None):
     e = dy.pick_batch(self.E, w)
     if not test:
         e = dy.dropout_dim(e, 0, self.wdr)
     # Run LSTM
     if state is None:
         self.ds = self.ds.add_input(e)
         next_state = self.ds
     else:
         next_state = state.add_input(e)
     h = next_state.output()
     return h, e, next_state
Exemplo n.º 25
0
 def next(self, word_idx, context, train, cur_state=None):
     embs = dy.pick_batch(self.E, word_idx)
     if train:
         embs = dy.dropout_dim(embs, 0, self.word_dropout)
     x = dy.concatenate([embs, context])
     if cur_state is None:
         self.dec_state = self.dec_state.add_input(x)
         next_state = self.dec_state
     else:
         next_state = cur_state.add_input(x)
     hidden = next_state.output()
     return hidden, embs, next_state
Exemplo n.º 26
0
    def learn(self, batch_size):
        if self.prioritized:
            if not self.memory.is_full(): return -np.inf
            indices, exps, weights = self.memory.sample(batch_size, self.beta)
        else:
            exps = self.memory.sample(batch_size)
        obss, actions, rewards, obs_nexts, dones = self._process(exps)

        dy.renew_cg()
        target_network = self.target_network if self.use_double_dqn else self.network
        if self.dueling:
            target_values, v = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue() + v.npvalue()
        else:
            target_values = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue()
        target_values = np.max(target_values, axis=0)
        target_values = rewards + self.reward_decay * (target_values *
                                                       (1 - dones))

        dy.renew_cg()
        if self.dueling:
            all_values_expr, v = self.network(obss, batched=True)
        else:
            all_values_expr = self.network(obss, batched=True)
        picked_values = dy.pick_batch(all_values_expr, actions)
        diff = (picked_values + v if self.dueling else
                picked_values) - dy.inputTensor(target_values, batched=True)
        if self.prioritized:
            self.memory.update(indices, np.transpose(np.abs(diff.npvalue())))
        losses = dy.pow(diff, dy.constant(1, 2))
        if self.prioritized:
            losses = dy.cmult(losses, dy.inputTensor(weights, batched=True))
        loss = dy.sum_batches(losses)
        loss_value = loss.npvalue()
        loss.backward()
        self.trainer.update()

        self.epsilon = max(self.epsilon - self.epsilon_decrease,
                           self.epsilon_lower)
        if self.prioritized:
            self.beta = min(self.beta + self.beta_increase, 1.)

        self.learn_step += 1
        if self.use_double_dqn and self.learn_step % self.n_replace_target == 0:
            self.target_network.update(self.network)
        return loss_value
Exemplo n.º 27
0
    def next(self, w, c, test=True, state=None):
        if isinstance(w, dy.Expression):
            e = w
        else:
            e = dy.pick_batch(self.E, w)

        if not test:
            e = dy.dropout_dim(e, 0, self.wdr)
        x = dy.concatenate([e, c])
        # Run LSTM
        if state is None:
            self.ds = self.ds.add_input(x)
            next_state = self.ds
        else:
            next_state = state.add_input(x)
        h = next_state.output()
        return h, e, next_state
Exemplo n.º 28
0
    def embed_sentence(self, ws, pwords, ts, chars, is_train):
        cembed = [dy.lookup_batch(self.clookup, c) for c in chars]
        char_fwd, char_bckd = self.char_lstm.builder_layers[0][0].initial_state().transduce(cembed)[-1], \
                              self.char_lstm.builder_layers[0][1].initial_state().transduce(reversed(cembed))[-1]
        crnn = dy.reshape(dy.concatenate_cols([char_fwd, char_bckd]), (self.options.we, ws.shape[0] * ws.shape[1]))
        cnn_reps = [list() for _ in range(len(ws))]
        for i in range(ws.shape[0]):
            cnn_reps[i] = dy.pick_batch(crnn, [i * ws.shape[1] + j for j in range(ws.shape[1])], 1)

        wembed = [dy.lookup_batch(self.wlookup, ws[i]) + dy.lookup_batch(self.elookup, pwords[i]) + cnn_reps[i] for i in range(len(ws))]
        posembed = [dy.lookup_batch(self.tlookup, ts[i]) for i in range(len(ts))]
        if (not is_train) or self.options.dropout == 0:
            return [dy.concatenate([wembed[i], posembed[i]]) for i in range(len(ts))]
        else:
            emb_masks = self.generate_emb_mask(ws.shape[0], ws.shape[1])
            return [dy.concatenate([dy.cmult(w, wm), dy.cmult(pos, posm)]) for w, pos, (wm, posm) in
                      zip(wembed, posembed, emb_masks)]
    def process_batch(self, batch, training=False):
        self.TRAINING_ITER = training
        self.DROPOUT = self.args.dropout if (
            self.TRAINING_ITER and self.args.dropout > 0) else None
        self.BATCH_SIZE = len(batch)

        sents, masks = self.vocab.batchify(batch)

        self.instantiate_parameters()
        init_state = self.rnn.initial_state(mb_size=self.BATCH_SIZE,
                                            dropout=self.DROPOUT)

        # embeddings = [dynet.reshape(dynet.select_cols(self.vocab_lookup, toks), (self.args.dim,), self.BATCH_SIZE)
        # embeddings = [dynet.reshape(dynet.transpose(dynet.select_rows(self.vocab_R, toks)), (self.args.dim*2,), self.BATCH_SIZE)
        embeddings = [dynet.pick_batch(self.vocab_R, toks) for toks in sents]
        outputs = init_state.transduce(embeddings)
        outputs = [
            dynet.to_device(out, self.args.param_device) for out in outputs
        ]
        if self.DROPOUT:
            y_ts = [dynet.cmult(y_t, self.dropout_mask_y_t) for y_t in outputs]
        else:
            y_ts = outputs

        r_ts = [
            dynet.affine_transform([
                self.vocab_bias, self.vocab_R,
                dynet.tanh(dynet.affine_transform([self.bias, self.R, y_t]))
            ]) for y_t in y_ts
        ]
        errs = [
            dynet.pickneglogsoftmax_batch(r_t, toks)
            for r_t, toks in zip(r_ts, sents[1:])
        ]

        for tok_i, (err, mask) in enumerate(zip(errs, masks[1:])):
            if min(mask) == 0:
                errs[tok_i] = err * dynet.inputTensor(
                    mask, batched=True, device=self.args.param_device)

        err = dynet.esum(errs)
        char_count = [1 + len(self.vocab.pp(sent[1:-1])) for sent in batch]
        word_count = [len(sent[1:]) for sent in batch]
        # word_count = [2+self.vocab.pp(sent[1:-1]).count(' ') for sent in batch]
        return {"loss": err, "charcount": char_count, "wordcount": word_count}
Exemplo n.º 30
0
    def compute_loss(self, words, extwords, tags, true_arcs, true_labels):
        arc_logits, rel_logits = self.forward(words, extwords, tags, True)
        seq_len = len(true_arcs)
        targets_1D = dynet_flatten_numpy(true_arcs)
        flat_arc_logits = dy.reshape(arc_logits, (seq_len, ), seq_len)
        losses = dy.pickneglogsoftmax_batch(flat_arc_logits, targets_1D)
        arc_loss = dy.sum_batches(losses)

        flat_rel_logits = dy.reshape(rel_logits, (seq_len, self.rel_size),
                                     seq_len)
        partial_rel_logits = dy.pick_batch(flat_rel_logits, targets_1D)
        targets_rel1D = dynet_flatten_numpy(true_labels)
        losses = dy.pickneglogsoftmax_batch(partial_rel_logits, targets_rel1D)
        rel_loss = dy.sum_batches(losses)

        loss = arc_loss + rel_loss

        return loss
Exemplo n.º 31
0
    def decode_loss(self, src_encodings, masks, tgt_seqs, sents_len):
        """
        :param tgt_seqs: (tgt_heads, tgt_labels): list (length=batch_size) of (src_len)
        """

        tgt_heads, tgt_labels = tgt_seqs

        src_len = len(tgt_heads[0])
        batch_size = len(tgt_heads)
        np_tgt_heads = np.array(tgt_heads).flatten()  # (src_len * batch_size)
        np_tgt_labels = np.array(tgt_labels).flatten()
        np_masks = np.array(masks).transpose().flatten()
        masks_expr = dy.inputVector(np_masks)
        masks_expr = dy.reshape(masks_expr, (1, ),
                                batch_size=src_len * batch_size)

        s_arc, s_label = self.cal_scores(
            src_encodings
        )  # (src_len, src_len, bs), ([(src_len, src_len, bs)])
        s_arc = dy.select_cols(s_arc, range(1, src_len + 1))
        s_label = [
            dy.select_cols(label, range(1, src_len + 1)) for label in s_label
        ]
        s_arc_value = s_arc.npvalue()
        s_arc_choice = np.argmax(
            s_arc_value,
            axis=0).transpose().flatten()  # (src_len * batch_size)
        s_pick_labels = [
            dy.pick_batch(
                dy.reshape(score, (src_len + 1, ),
                           batch_size=src_len * batch_size), s_arc_choice)
            for score in s_label
        ]
        s_argmax_labels = dy.concatenate(s_pick_labels,
                                         d=0)  # n_labels, src_len * batch_size

        reshape_s_arc = dy.reshape(s_arc, (src_len + 1, ),
                                   batch_size=src_len * batch_size)
        arc_loss = dy.pickneglogsoftmax_batch(reshape_s_arc, np_tgt_heads)
        arc_loss = arc_loss * masks_expr
        label_loss = dy.pickneglogsoftmax_batch(s_argmax_labels, np_tgt_labels)
        label_loss = label_loss * masks_expr
        loss = dy.sum_batches(arc_loss + label_loss)
        return loss
Exemplo n.º 32
0
    def learn(self, batch_size):
        if self.prioritized:
            if not self.memory.is_full(): return -np.inf
            indices, exps, weights = self.memory.sample(batch_size, self.beta)
        else:
            exps = self.memory.sample(batch_size)
        obss, actions, rewards, obs_nexts, dones = self._process(exps)

        dy.renew_cg()
        target_network = self.target_network if self.use_double_dqn else self.network
        if self.dueling:
            target_values, v = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue() + v.npvalue()
        else:
            target_values = target_network(obs_nexts, batched=True)
            target_values = target_values.npvalue()
        target_values = np.max(target_values, axis=0)
        target_values = rewards + self.reward_decay * (target_values * (1 - dones))

        dy.renew_cg()
        if self.dueling:
            all_values_expr, v = self.network(obss, batched=True)
        else:
            all_values_expr = self.network(obss, batched=True)
        picked_values = dy.pick_batch(all_values_expr, actions)
        diff = (picked_values + v if self.dueling else picked_values) - dy.inputTensor(target_values, batched=True)
        if self.prioritized:
            self.memory.update(indices, np.transpose(np.abs(diff.npvalue())))
        losses = dy.pow(diff, dy.constant(1, 2))
        if self.prioritized:
            losses = dy.cmult(losses, dy.inputTensor(weights, batched=True))
        loss = dy.sum_batches(losses)
        loss_value = loss.npvalue()
        loss.backward()
        self.trainer.update()

        self.epsilon = max(self.epsilon - self.epsilon_decrease, self.epsilon_lower)
        if self.prioritized:
            self.beta = min(self.beta + self.beta_increase, 1.)

        self.learn_step += 1
        if self.use_double_dqn and self.learn_step % self.n_replace_target == 0:
            self.target_network.update(self.network)
        return loss_value
Exemplo n.º 33
0
# regular lookup
a = lp[1].npvalue()
b = lp[2].npvalue()
c = lp[3].npvalue()

# batch lookup instead of single elements.
# two ways of doing this.
abc1 = dy.lookup_batch(lp, [1,2,3])
print(abc1.npvalue())

abc2 = lp.batch([1,2,3])
print(abc2.npvalue())

print(np.hstack([a,b,c]))


# use pick and pickneglogsoftmax in batch mode
# (must be used in conjunction with lookup_batch):
print("\nPick")
W = dy.parameter( m.add_parameters((5, 10)) )
h = W * lp.batch([1,2,3])
print(h.npvalue())
print(dy.pick_batch(h,[1,2,3]).npvalue())
print(dy.pick(W*lp[1],1).value(), dy.pick(W*lp[2],2).value(), dy.pick(W*lp[3],3).value())

# using pickneglogsoftmax_batch
print("\nPick neg log softmax")
print((-dy.log(dy.softmax(h))).npvalue())
print(dy.pickneglogsoftmax_batch(h,[1,2,3]).npvalue())