def _losses_single(self, score_expr, gold_idxes_expr, single_sample, is_hinge=False, margin=0.): # expand the idxes to 0/1 score_shape = BK.get_shape(score_expr) expanded_idxes_expr = BK.constants(score_shape, 0.) expanded_idxes_expr = BK.minus_margin(expanded_idxes_expr, gold_idxes_expr, -1.) # minus -1 means +1 # todo(+N): first adjust margin, since previously only minus margin for golds? if margin > 0.: adjusted_scores = margin + BK.minus_margin(score_expr, gold_idxes_expr, margin) else: adjusted_scores = score_expr # [*, L] if is_hinge: # multiply pos instances with -1 flipped_scores = adjusted_scores * (1. - 2 * expanded_idxes_expr) losses_all = BK.clamp(flipped_scores, min=0.) else: losses_all = BK.binary_cross_entropy_with_logits( adjusted_scores, expanded_idxes_expr, reduction='none') # special interpretation (todo(+2): there can be better implementation) if single_sample < 1.: # todo(warn): lower bound of sample_rate, ensure 2 samples real_sample_rate = max(single_sample, 2. / score_shape[-1]) elif single_sample >= 2.: # including the positive one real_sample_rate = max(single_sample, 2.) / score_shape[-1] else: # [1., 2.) real_sample_rate = single_sample # if real_sample_rate < 1.: sample_weight = BK.random_bernoulli(score_shape, real_sample_rate, 1.) # make sure positive is valid sample_weight = (sample_weight + expanded_idxes_expr.float()).clamp_(0., 1.) # final_losses = (losses_all * sample_weight).sum(-1) / sample_weight.sum(-1) else: final_losses = losses_all.mean(-1) return final_losses
def _losses_global_prob(self, full_score_expr, gold_heads_expr, gold_labels_expr, marginals_expr, mask_expr): # combine the last two dimension full_shape = BK.get_shape(full_score_expr) last_size = full_shape[-1] # [*, m, h*L] combined_marginals_expr = marginals_expr.view(full_shape[:-2] + [-1]) # # todo(warn): make sure sum to 1., handled in algorithm instead # combined_marginals_expr = combined_marginals_expr / combined_marginals_expr.sum(dim=-1, keepdim=True) # [*, m] gold_combined_idx_expr = gold_heads_expr * last_size + gold_labels_expr # [*, m, h, L] gradients = BK.minus_margin(combined_marginals_expr, gold_combined_idx_expr, 1.).view(full_shape) # the gradients on h are already 0. from the marginal algorithm gradients_masked = gradients * mask_expr.unsqueeze(-1).unsqueeze( -1) * mask_expr.unsqueeze(-2).unsqueeze(-1) # for the h-dimension, need to divide by the real length. # todo(warn): this values should be directly summed rather than averaged, since directly from loss fake_losses = (full_score_expr * gradients_masked).sum(-1).sum( -1) # [BS, m] # todo(warn): be aware of search-error-like output constrains; # but this clamp for all is not good for loss-prob, dealt at outside with unproj-mask. # <bad> fake_losses = BK.clamp(fake_losses, min=0.) return fake_losses
def _score_label_full(self, scoring_expr_pack, mask_expr, training, margin, gold_heads_expr=None, gold_labels_expr=None): _, _, lm_expr, lh_expr = scoring_expr_pack # [BS, len-m, len-h, L] full_label_score = self.scorer.score_label_all(lm_expr, lh_expr, mask_expr, mask_expr) # # set diag to small values # todo(warn): handled specifically in algorithms # maxlen = BK.get_shape(full_label_score, 1) # full_label_score += BK.diagflat(BK.constants([maxlen], Constants.REAL_PRAC_MIN)).unsqueeze(-1) # margin? -- specially reshaping if training and margin > 0.: full_shape = BK.get_shape(full_label_score) # combine last two dim combiend_score_expr = full_label_score.view(full_shape[:-2] + [-1]) combined_idx_expr = gold_heads_expr * full_shape[ -1] + gold_labels_expr combined_changed_score = BK.minus_margin(combiend_score_expr, combined_idx_expr, margin) full_label_score = combined_changed_score.view(full_shape) return full_label_score
def _score_arc_full(self, scoring_expr_pack, mask_expr, training, margin, gold_heads_expr=None): am_expr, ah_expr, _, _ = scoring_expr_pack # [BS, len-m, len-h] full_arc_score = self.scorer.score_arc_all(am_expr, ah_expr, mask_expr, mask_expr) # # set diag to small values # todo(warn): handled specifically in algorithms # maxlen = BK.get_shape(full_arc_score, 1) # full_arc_score += BK.diagflat(BK.constants([maxlen], Constants.REAL_PRAC_MIN)) # margin? if training and margin > 0.: full_arc_score = BK.minus_margin(full_arc_score, gold_heads_expr, margin) return full_arc_score
def _score_label_selected(self, scoring_expr_pack, mask_expr, training, margin, gold_heads_expr, gold_labels_expr=None): _, _, lm_expr, lh_expr = scoring_expr_pack # [BS, len-m, D] lh_expr_shape = BK.get_shape(lh_expr) selected_lh_expr = BK.gather( lh_expr, gold_heads_expr.unsqueeze(-1).expand(*lh_expr_shape), dim=len(lh_expr_shape) - 2) # [BS, len-m, L] select_label_score = self.scorer.score_label_select( lm_expr, selected_lh_expr, mask_expr) # margin? if training and margin > 0.: select_label_score = BK.minus_margin(select_label_score, gold_labels_expr, margin) return select_label_score