Пример #1
0
 def embed(self, x: Union[batchers.Batch, numbers.Integral]) -> dy.Expression:
   if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
     batch_size = x.batch_size() if batchers.is_batched(x) else 1
     self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)]
   emb_e = dy.parameter(self.embeddings)
   # single mode
   if not batchers.is_batched(x):
     if self.train and self.word_id_mask and x in self.word_id_mask[0]:
       ret = dy.zeros((self.emb_dim,))
     else:
       ret = dy.pick(emb_e, index=x)
       if self.fix_norm is not None:
         ret = dy.cdiv(ret, dy.l2_norm(ret))
         if self.fix_norm != 1:
           ret *= self.fix_norm
   # minibatch mode
   else:
     ret = dy.pick_batch(emb_e, x)
     if self.fix_norm is not None:
       ret = dy.cdiv(ret, dy.l2_norm(ret))
       if self.fix_norm != 1:
         ret *= self.fix_norm
     if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(x.batch_size())):
       dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(x.batch_size())]), batched=True)
       ret = dy.cmult(ret, dropout_mask)
   if self.train and self.weight_noise > 0.0:
     ret = dy.noise(ret, self.weight_noise)
   return ret
 def get_normalized_reps(self, embs, forward_lstm, backward_lstm, encode=False):
     word_reps = [dy.concatenate([forward_lstm.initial_state().transduce(emb)[-1],
                                  backward_lstm.initial_state().transduce(reversed(emb))[-1]]) for emb in embs]
     if not encode:
         return [dy.cdiv(rep, dy.l2_norm(rep)) for rep in word_reps]
     else:
         return [dy.cdiv(rep, dy.l2_norm(rep)).value() for rep in word_reps]
Пример #3
0
 def embed(self, x):
   if self.train and self.word_dropout > 0.0 and self.word_id_mask is None:
     batch_size = x.batch_size() if xnmt.batcher.is_batched(x) else 1
     self.word_id_mask = [set(np.random.choice(self.vocab_size, int(self.vocab_size * self.word_dropout), replace=False)) for _ in range(batch_size)]
   # single mode
   if not xnmt.batcher.is_batched(x):
     if self.train and self.word_id_mask and x in self.word_id_mask[0]:
       ret = dy.zeros((self.emb_dim,))
     else:
       ret = self.embeddings[x]
       if self.fix_norm is not None:
         ret = dy.cdiv(ret, dy.l2_norm(ret))
         if self.fix_norm != 1:
           ret *= self.fix_norm
   # minibatch mode
   else:
     ret = self.embeddings.batch(x)
     if self.fix_norm is not None:
       ret = dy.cdiv(ret, dy.l2_norm(ret))
       if self.fix_norm != 1:
         ret *= self.fix_norm
     if self.train and self.word_id_mask and any(x[i] in self.word_id_mask[i] for i in range(x.batch_size())):
       dropout_mask = dy.inputTensor(np.transpose([[0.0]*self.emb_dim if x[i] in self.word_id_mask[i] else [1.0]*self.emb_dim for i in range(x.batch_size())]), batched=True)
       ret = dy.cmult(ret, dropout_mask)
   if self.train and self.weight_noise > 0.0:
     ret = dy.noise(ret, self.weight_noise)
   return ret
Пример #4
0
 def __cosine_loss(self, pred, gold):
     sn1 = dy.l2_norm(pred)
     sn2 = dy.l2_norm(gold)
     mult = dy.cmult(sn1, sn2)
     dot = dy.dot_product(pred, gold)
     div = dy.cdiv(dot, mult)
     vec_y = dy.scalarInput(2)
     res = dy.cdiv(1 - div, vec_y)
     return res
Пример #5
0
 def on_calc_additional_loss(self, trg, generator, generator_loss):
     assert hasattr(
         generator,
         "losses"), "Must support multi sample encoder from generator."
     if self.policy_learning is None:
         return None
     ### Calculate reward
     rewards = []
     trg_counts = dy.inputTensor([t.len_unpadded() for t in trg],
                                 batched=True)
     # Iterate through all samples
     for i, (loss, actions) in enumerate(
             zip(generator.losses, self.compose_output)):
         reward = FactoredLossExpr()
         # Adding all reward from the translator
         for loss_key, loss_value in loss.get_nobackprop_loss().items():
             if loss_key == 'mle':
                 reward.add_loss('mle', dy.cdiv(-loss_value, trg_counts))
             else:
                 reward.add_loss(loss_key, -loss_value)
         if self.length_prior is not None:
             reward.add_loss(
                 'seg_lp',
                 self.length_prior.log_ll(self.seg_size_unpadded[i]))
         rewards.append(dy.esum(list(reward.expr_factors.values())))
     ### Calculate losses
     return self.policy_learning.calc_loss(rewards)
def softmax(x):
    """
    Compute the softmax function in tensorflow.

    You might find the tensorflow functions tf.exp, tf.reduce_max,
    tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
    not need to use all of these functions). Recall also that many common
    tensorflow operations are sugared (e.g. x * y does a tensor multiplication
    if x and y are both tensors). Make sure to implement the numerical stability
    fixes as in the previous homework!

    Args:
        x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
                  represented by row-vectors. (For simplicity, no need to handle 1-d
                  input as in the previous homework)
    Returns:
        out: tf.Tensor with shape (n_sample, n_features). You need to construct this
                  tensor in this problem.
    """

    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    sum_exp = dy.colwise_add(dy.zeroes(x.dim()[0]), dy.sum_cols(x_exp))

    out = dy.cdiv(x_exp, sum_exp)
    ### END YOUR CODE

    return out
def test_item(model, sentence):
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        global_max = max_pooling(encoded_sequence)
        global_min = average_pooling(encoded_sequence)
        if len(encoded_sequence) > 0:
            att_mlp_outputs = []
            for e in encoded_sequence:
                mlp_out = (model.attention_w * e) + model.attention_b
                att_mlp_outputs.append(mlp_out)

            lst = []
            for o in att_mlp_outputs:
                lst.append(dy.exp(dy.sum_elems(dy.cmult(o,
                                                        model.att_context))))

            sum_all = dy.esum(lst)

            probs = [dy.cdiv(e, sum_all) for e in lst]
            att_context = dy.esum(
                [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)])
            context = dy.concatenate([att_context, global_max, global_min])
            y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)
            sentence.prediction_result = y_pred.scalar_value()
            dy.renew_cg()
            return sentence.prediction_result
    return 0
Пример #8
0
  def transform(self, input_expr: dy.Expression, mask: Optional[batchers.Mask]=None):
    """
    Apply batch norm.

    Args:
      input_expr: input
      mask: compute statistics only over unmasked parts of the input expression
    """
    dim_in = input_expr.dim()
    param_bn_gamma = dy.parameter(self.gamma)
    param_bn_beta = dy.parameter(self.beta)
    if self.train:
      num_unmasked = 0
      if mask is not None:
        input_expr = set_masked_to_mean(mask, input_expr, self.time_first)
        num_unmasked = (mask.np_arr.size - np.count_nonzero(mask.np_arr)) * broadcast_factor(mask, input_expr)
      bn_mean = dy.moment_dim(input_expr, self.get_stat_dimensions(), 1, True, num_unmasked)
      neg_bn_mean_reshaped = -dy.reshape(-bn_mean, self.get_normalizer_dimensionality())
      self.population_running_mean += (-BN_MOMENTUM) * self.population_running_mean + BN_MOMENTUM * bn_mean.npvalue()
      bn_std = dy.std_dim(input_expr, self.get_stat_dimensions(), True, num_unmasked)
      self.population_running_std += (-BN_MOMENTUM) * self.population_running_std + BN_MOMENTUM * bn_std.npvalue()
    else:
      neg_bn_mean_reshaped = -dy.reshape(dy.inputVector(self.population_running_mean), self.get_normalizer_dimensionality())
      bn_std = dy.inputVector(self.population_running_std)
    bn_numerator = input_expr + neg_bn_mean_reshaped
    bn_xhat = dy.cdiv(bn_numerator, dy.reshape(bn_std, self.get_normalizer_dimensionality()) + BN_EPS)
    bn_y = dy.cmult(param_bn_gamma, bn_xhat) + param_bn_beta # y = gamma * xhat + beta
    dim_out = bn_y.dim()
    self.save_processed_arg("population_running_mean", self.population_running_mean)
    self.save_processed_arg("population_running_std", self.population_running_std)
    assert dim_out == dim_in
    return bn_y
Пример #9
0
def set_masked_to_mean(mask, tensor_expr, time_first=False):
    """
  Set masked parts of the tensor expr to the mean of the unmasked parts.
  """
    if np.count_nonzero(mask.np_arr) == 0:
        return tensor_expr
    else:
        dim_before = tensor_expr.dim()
        reshape_size = mask_reshape_size(mask, tensor_expr.dim(), time_first)
        inv_mask_expr = dy.inputTensor(
            1.0 - np.reshape(mask.np_arr.transpose(), reshape_size),
            batched=True)
        unmasked = dy.cmult(tensor_expr, inv_mask_expr)
        unmasked_mean = unmasked
        while sum(
                unmasked_mean.dim()[0]
        ) > 1:  # loop because mean_dim only supports reducing up to 2 dimensions at a time
            unmasked_mean = dy.mean_dim(
                unmasked_mean,
                list(range(min(2, len(unmasked_mean.dim()[0])))),
                unmasked_mean.dim()[1] > 1,
                n=1)  # this is mean without normalization == sum
        unmasked_mean = dy.cdiv(
            unmasked_mean,
            dy.inputTensor(np.asarray([
                (mask.np_arr.size - np.count_nonzero(mask.np_arr)) *
                broadcast_factor(mask, tensor_expr)
            ]),
                           batched=False))
        mask_expr = dy.cmult(
            dy.inputTensor(np.reshape(mask.np_arr.transpose(), reshape_size),
                           batched=True), unmasked_mean)
        ret = unmasked + mask_expr
        assert ret.dim() == dim_before
        return ret
Пример #10
0
 def l2_normalize(vector):
     square_sum = dy.sqrt(
         dy.bmax(
             dy.sum_elems(dy.square(vector)),
             np.finfo(float).eps * dy.ones((1))[0],
         ))
     return dy.cdiv(vector, square_sum)
Пример #11
0
    def dycosine(query_vec, question_vec):
        num = dy.transpose(query_vec) * question_vec
        dem1 = dy.sqrt(dy.transpose(query_vec) * query_vec)
        dem2 = dy.sqrt(dy.transpose(question_vec) * question_vec)
        dem = dem1 * dem2

        return dy.cdiv(num, dem)
Пример #12
0
 def on_calc_additional_loss(self, reward):
     if not self.learn_segmentation:
         return None
     ret = LossBuilder()
     if self.length_prior_alpha > 0:
         reward += self.segment_length_prior * self.length_prior_alpha
     reward = dy.cdiv(reward - dy.mean_batches(reward),
                      dy.std_batches(reward))
     # Baseline Loss
     if self.use_baseline:
         baseline_loss = []
         for i, baseline in enumerate(self.bs):
             baseline_loss.append(dy.squared_distance(reward, baseline))
         ret.add_loss("Baseline", dy.esum(baseline_loss))
     # Reinforce Loss
     lmbd = self.lmbd.get_value(self.warmup_counter)
     if lmbd > 0.0:
         reinforce_loss = []
         # Calculating the loss of the baseline and reinforce
         for i in range(len(self.segment_decisions)):
             ll = dy.pick_batch(self.segment_logsoftmaxes[i],
                                self.segment_decisions[i])
             if self.use_baseline:
                 r_i = reward - self.bs[i]
             else:
                 r_i = reward
             reinforce_loss.append(dy.logistic(r_i) * ll)
         ret.add_loss("Reinforce", -dy.esum(reinforce_loss) * lmbd)
     # Total Loss
     return ret
Пример #13
0
 def conditional2(self, joint, prior1):
     size1, size2 = joint.dim()[0]
     conditional = []
     for z in xrange(size1):
         z_copied = dy.concatenate([prior1[z] for _ in xrange(size2)])
         conditional.append(dy.cdiv(dy.pick(joint, z), z_copied))
     return conditional
Пример #14
0
    def pz(self, eq):
        """
		Gumbel softmax on distribution over z.
		"""
        W = dy.parameter(self.W)
        prob = dy.softmax(W * eq)
        gumbel = dy.random_gumbel(self.num_clusters)
        # y = []
        # denom = []
        # for z in range(self.num_clusters):
        # 	pi_i = prob[z]
        # 	g_i = gumbel[z]
        # 	val = dy.exp((dy.log(pi_i)+g_i)/self.temp)
        # 	denom.append(val)
        # denom = dy.esum(denom)

        # for z in range(self.num_clusters):
        # 	pi_i = prob[z]
        # 	g_i = gumbel[z]
        # 	numerator = dy.exp((dy.log(pi_i)+g_i)/self.temp)
        # 	y.append(dy.cdiv(numerator, denom))

        logits = dy.softmax(
            dy.cdiv(dy.esum([prob, gumbel]), dy.inputVector([self.temp])))

        # logits = dy.concatenate(y)
        # print(np.max(logits.npvalue()))
        return logits
Пример #15
0
def eval_dict_dataset(dataset, net, shortlist, proj, parsed):
    ranks = []
    num_batches = len(dataset)
    if parsed:
        dim = dataset[0][0].shape[0]
        batch_size = 1
    else:
        dim, batch_size = dataset[0][0].shape
    for batch_num, data in enumerate(dataset):
        if parsed:
            words, definitions, _ = data
        else:
            words, definitions = data
        words = np.reshape(np.transpose(words), (batch_size, dim))
        dy.renew_cg()
        P = dy.parameter(proj)
        if parsed:
            outputs = net.do_parse_tree(definitions)
        else:
            outputs, _ = net(definitions)
        outputs = P * outputs
        normalised_outputs = outputs * dy.cdiv(dy.inputTensor([1]), dy.sqrt(dy.squared_norm(outputs)))
        normalised_outputs = np.reshape(np.transpose(normalised_outputs.npvalue()), (batch_size, dim))
        for output, word in zip(normalised_outputs, words):
            target_similarity = np.dot(word, output)
            similarities = np.dot(shortlist, output)
            rank = (similarities > target_similarity).sum()
            ranks.append(rank)
    total = len(ranks)
    accuracy10 = float(sum(int(r <= 10) for r in ranks))/total
    accuracy100 = float(sum(int(r <= 100) for r in ranks))/total
    return np.median(ranks), accuracy10, accuracy100
Пример #16
0
 def cell_expr(self):
   """Returns:
        dy.Expression: cell state; if not given, it is inferred as inverse tanh of main expression
   """
   if self._cell_expr is None:
     self._cell_expr = 0.5 * dy.log( dy.cdiv(1.+self._main_expr, 1.-self._main_expr) )
   return self._cell_expr
Пример #17
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = src.dim()[0][1]
    # src_channels = 1
    batch_size = src.dim()[1]

    # convolution and pooling layers
    # src dim is ((40, 1000), 128)
    src = padding(src, self.filter_width[0]+3)
    l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128)
    pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128)

    pool1 = padding(pool1, self.filter_width[1]+3)
    l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128)
    pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128)

    pool2 = padding(pool2, self.filter_width[2])
    l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128)
    pool3 = dy.max_dim(l3, d = 1)

    my_norm = dy.l2_norm(pool3) + 1e-6
    output = dy.cdiv(pool3,my_norm)
    output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size)

    return ExpressionSequence(expr_tensor=output)
Пример #18
0
 def attention(self, Q, K, V):
     weights = dy.softmax(dy.cdiv(Q * dy.transpose(K),
                                  dy.scalarInput(np.sqrt(self.d_k))),
                          d=1)
     #v = weights.value()
     #np.save("att.txt", weights.value())
     return weights * V
Пример #19
0
 def _attend(self, query, mask=None):
     # query ((H), B)
     # mask  ((T, 1), B)
     query = unsqueeze(query, 0)
     # ((1, H), B) * ((H, T), B) -> ((1, T), B) -> ((T, 1), B)
     attn_scores = dy.cdiv(dy.transpose(query * self.context), dy.scalarInput(self.scale))
     if mask is not None:
         attn_scores = dy.cmult(attn_scores, mask[0]) + (mask[1] * dy.scalarInput(-1e9))
     return dy.softmax(attn_scores)  # ((T, 1), B)
Пример #20
0
  def generate(self, src, forced_trg_ids):
    assert not forced_trg_ids
    assert batchers.is_batched(src) and src.batch_size()==1, "batched generation not fully implemented"
    src = src[0]
    # Generating outputs
    outputs = []
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.generate_per_step:
        assert self.mode == "avg_mlp", "final_mlp not supported with generate_per_step=True"
        scores = [dy.logistic(self.output_layer.transform(enc_i)) for enc_i in encodings]
      else:
        if self.mode == "avg_mlp":
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) * (1.0 / encodings.dim()[0][1])
        elif self.mode == "final_mlp":
          encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
        scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode == "lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:, step_i]) > 0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:, step_i], batched=True))
        enc_lin.append(step_linear)
      if self.generate_per_step:
        scores = [dy.logistic(enc_i) for enc_i in enc_lin]
      else:
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                        dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
        scores = dy.logistic(encoding_fixed_size)
    else:
      raise ValueError(f"unknown mode '{self.mode}'")

    if self.generate_per_step:
      output_actions = [np.argmax(score_i.npvalue()) for score_i in scores]
      score = np.sum([np.max(score_i.npvalue()) for score_i in scores])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    else:
      scores_arr = scores.npvalue()
      output_actions = list(np.nonzero(scores_arr > 0.5)[0])
      score = np.sum(scores_arr[scores_arr > 0.5])
      outputs.append(sent.SimpleSentence(words=output_actions,
                                         idx=src.idx,
                                         vocab=getattr(self.trg_reader, "vocab", None),
                                         score=score,
                                         output_procs=self.trg_reader.output_procs))
    return outputs
Пример #21
0
 def cal_context(self, s, selected=None):
     ws = self.cal_scores(s)
     if selected is None:
         return self.es_matrix * ws, ws
     selected_ws = dy.select_rows(ws, selected)
     selected_ws = dy.cdiv(selected_ws,
                           dy.sum_elems(selected_ws))
     return dy.concatenate_cols(
         [es[index]
          for index in selected]) * selected_ws, ws
Пример #22
0
  def on_calc_additional_loss(self, translator_loss):
    if not self.learn_segmentation or self.segment_decisions is None:
      return None
    reward = -translator_loss["mle"]
    if not self.log_reward:
      reward = dy.exp(reward)
    reward = dy.nobackprop(reward)

    # Make sure that reward is not scalar, but rather based on the each batch item
    assert reward.dim()[1] == len(self.src_sent)
    # Mask
    enc_mask = self.enc_mask.get_active_one_mask().transpose() if self.enc_mask is not None else None
    # Compose the lose
    ret = LossBuilder()
    ## Length prior
    alpha = self.length_prior_alpha.value() if self.length_prior_alpha is not None else 0
    if alpha > 0:
      reward += self.segment_length_prior * alpha
    # reward z-score normalization
    if self.z_normalization:
      reward = dy.cdiv(reward-dy.mean_batches(reward), dy.std_batches(reward) + EPS)
    ## Baseline Loss
    if self.use_baseline:
      baseline_loss = []
      for i, baseline in enumerate(self.bs):
        loss = dy.squared_distance(reward, baseline)
        if enc_mask is not None:
          loss = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), loss)
        baseline_loss.append(loss)

      ret.add_loss("Baseline", dy.esum(baseline_loss))

    if self.print_sample:
      print(dy.exp(self.segment_logsoftmaxes[i]).npvalue().transpose()[0])
    ## Reinforce Loss
    lmbd = self.lmbd.value()
    if lmbd > 0.0:
      reinforce_loss = []
      # Calculating the loss of the baseline and reinforce
      for i in range(len(self.segment_decisions)):
        ll = dy.pick_batch(self.segment_logsoftmaxes[i], self.segment_decisions[i])
        if self.use_baseline:
          r_i = reward - dy.nobackprop(self.bs[i])
        else:
          r_i = reward
        if enc_mask is not None:
          ll = dy.cmult(dy.inputTensor(enc_mask[i], batched=True), ll)
        reinforce_loss.append(r_i * -ll)
      loss = dy.esum(reinforce_loss) * lmbd
      ret.add_loss("Reinforce", loss)
    if self.confidence_penalty:
      ls_loss = self.confidence_penalty(self.segment_logsoftmaxes, enc_mask)
      ret.add_loss("Confidence Penalty", ls_loss)
    # Total Loss
    return ret
def softmax(x):
    ### YOUR CODE HERE
    x_max = dy.max_dim(x, 1)
    x_sub = dy.colwise_add(x, -x_max)
    x_exp = dy.exp(x_sub)
    x_sum = dy.sum_cols(x_exp)
    x_tmp = dy.zeroes(x.dim()[0])
    x_tmp = dy.colwise_add(x_tmp, x_sum)
    out = dy.cdiv(x_exp, x_tmp)
    ### END YOUR CODE
    return out
Пример #24
0
def selu(x):
    """ :type x: dn.Expression
        :rtype: dn.Expression """
    positive = dn.rectify(x)
    positive_indicator = dn.rectify(dn.cdiv(positive, positive + epsilon))
    negative = -dn.rectify(-x)
    exp_negative = dn.exp(negative) - positive_indicator
    exp_negative_minus_alpha = exp_negative * alpha - alpha + positive_indicator * alpha
    # x>0: x=x * scale; x<0: x = (alpha * exp(x) - alpha) * scale
    ret = (positive + exp_negative_minus_alpha) * scale
    return ret
Пример #25
0
 def _attend(self, query, mask=None):
     # query ((H), B)
     # mask  ((T, 1), B)
     query = unsqueeze(query, 0)
     # ((1, H), B) * ((H, T), B) -> ((1, T), B) -> ((T, 1), B)
     attn_scores = dy.cdiv(dy.transpose(query * self.context),
                           dy.scalarInput(self.scale))
     if mask is not None:
         attn_scores = dy.cmult(attn_scores,
                                mask[0]) + (mask[1] * dy.scalarInput(-1e9))
     return dy.softmax(attn_scores)  # ((T, 1), B)
Пример #26
0
  def calc_nll(self, src, trg):
    event_trigger.start_sent(src)
    embeddings = self.src_embedder.embed_sent(src)
    encodings = self.encoder.transduce(embeddings)
    if not batchers.is_batched(trg): trg = batchers.mark_as_batch([trg])

    if self.mode in ["avg_mlp", "final_mlp"]:
      if self.mode=="avg_mlp":
        if encodings.mask:
          encoding_fixed_size = dy.cdiv(dy.sum_dim(encodings.as_tensor(), [1]),
                                 dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
        else:
          encoding_fixed_size = dy.sum_dim(encodings.as_tensor(), [1]) / encodings.dim()[0][1]
      elif self.mode=="final_mlp":
        encoding_fixed_size = self.encoder.get_final_states()[-1].main_expr()
      scores = dy.logistic(self.output_layer.transform(encoding_fixed_size))
    elif self.mode=="lin_sum_sig":
      enc_lin = []
      for step_i, enc_i in enumerate(encodings):
        step_linear = self.output_layer.transform(enc_i)
        if encodings.mask and np.sum(encodings.mask.np_arr[:,step_i])>0:
          step_linear = dy.cmult(step_linear, dy.inputTensor(1.0 - encodings.mask.np_arr[:,step_i], batched=True))
        enc_lin.append(step_linear)
      if encodings.mask:
        encoding_fixed_size = dy.cdiv(dy.esum(enc_lin),
                                      dy.inputTensor(np.sum(1.0 - encodings.mask.np_arr, axis=1), batched=True))
      else:
        encoding_fixed_size = dy.esum(enc_lin) / encodings.dim()[0][1]
      scores = dy.logistic(encoding_fixed_size)

    else: raise ValueError(f"unknown mode '{self.mode}'")

    idxs = ([], [])
    for batch_i in range(trg.batch_size()):
      for word in set(trg[batch_i]):
        if word not in {vocabs.Vocab.ES, vocabs.Vocab.SS}:
          idxs[0].append(word)
          idxs[1].append(batch_i)
    trg_scores = dy.sparse_inputTensor(idxs, values = np.ones(len(idxs[0])), shape=scores.dim()[0] + (scores.dim()[1],), batched=True, )
    loss_expr = dy.binary_log_loss(scores, trg_scores)
    return loss_expr
Пример #27
0
    def papx(self, example, turn_idx, state):
        """
		Calculate the cost of utterance and action given z.
		"""

        encoder_input = example[0]
        labels = example[1]
        text = labels[0]
        goal_utterance = text[turn_idx]

        pa = self.pa(state)
        px = self.px(state, goal_utterance)

        #####################
        ## Action probability:
        #####################
        # -> Action Classifier:
        prev_text = text[:turn_idx]
        if prev_text == []:
            prev_text = [[self.vocab.index("<PAD>")]]

        agreement_space = encoder_input[0]  # of form [1, 4, 4]
        agreement = labels[1]
        cdata = [agreement_space, prev_text, agreement]

        self.classifier.predict_example(cdata)
        logits, _ = self.classifier.get_logits(cdata)

        label_idx = -999
        for idx in range(len(self.classifier.agreement_space)):
            if agreement == self.classifier.agreement_space[idx]:
                label_idx = idx

        action_prob = dy.softmax(logits)
        pa = dy.softmax(pa)

        # -> KL Divergence

        action_diverge = dy.cmult(pa, dy.log(dy.cdiv(pa, action_prob)))
        action_diverge = dy.sum_elems(action_diverge)

        ###################
        ## Text probability:
        ###################
        decoder_target = goal_utterance + [self.vocab.index("<END>")]
        losses = []
        for (log_prob, target) in zip(px, decoder_target):
            losses.append(dy.pickneglogsoftmax(log_prob, target))
        text_loss = dy.esum(losses)

        return dy.esum([action_diverge, text_loss])
Пример #28
0
 def embed(self, x: Union[batchers.Batch,
                          numbers.Integral]) -> dy.Expression:
     """
 Embed a single word in a sentence.
 :param x: A word id.
 :return: Embedded word.
 """
     ret = self._embed_word(x, batchers.is_batched(x))
     ## Applying Fix normalization
     if self.fix_norm is not None:
         ret = dy.cdiv(ret, dy.l2_norm(ret)) * self.fix_norm
     ## Weight noise only when training
     if self.train and self.weight_noise > 0.0:
         ret = dy.noise(ret, self.weight_noise)
     return ret
Пример #29
0
  def transduce(self, src: ExpressionSequence) -> ExpressionSequence:
    src = src.as_tensor()

    src_height = src.dim()[0][0]
    src_width = 1
    batch_size = src.dim()[1]

    W = dy.parameter(self.pW)
    b = dy.parameter(self.pb)

    src = dy.reshape(src, (src_height, src_width), batch_size=batch_size) # ((276, 80, 3), 1)
    # convolution and pooling layers
    l1 = (W*src)+b
    output = dy.cdiv(l1,dy.sqrt(dy.squared_norm(l1)))
    return ExpressionSequence(expr_tensor=output)
Пример #30
0
    def __call__(self, x, soft_labels=False, temperature=None):
        if self.mlp:
            W_mlp = dynet.parameter(self.W_mlp)
            b_mlp = dynet.parameter(self.b_mlp)
            act = self.mlp_activation
            x_in = act(W_mlp * x + b_mlp)
        else:
            x_in = x
        # from params to expressions
        W = dynet.parameter(self.W)
        b = dynet.parameter(self.b)

        logits = W*x_in + b
        if soft_labels and temperature:
            # calculate the soft labels smoothed with the temperature
            # see Distilling the Knowledge in a Neural Network
            elems = dynet.exp(logits / temperature)
            return dynet.cdiv(elems, dynet.sum_elems(elems))
        return self.act(logits)
def show_attention_weights(model, sentence):
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        if len(encoded_sequence) > 0:
            att_mlp_outputs = []
            for e in encoded_sequence:
                mlp_out = (model.attention_w * e) + model.attention_b
                att_mlp_outputs.append(mlp_out)

            lst = []
            for o in att_mlp_outputs:
                lst.append(dy.exp(dy.sum_elems(dy.cmult(o, model.att_context))))

            sum_all = dy.esum(lst)
            probs = [dy.cdiv(e, sum_all).scalar_value() for e in lst]
            return probs