def double_linear_logits(args, size, bias, bias_start=0.0, mask=None, wd=0.0, input_drop_prob=0.0, is_train=None): first = torch.tanh( linear(args, size, bias, bias_start=bias_start, wd=wd, input_drop_prob=input_drop_prob, is_train=is_train)) second = linear(first, 1, bias, bias_start=bias_start, squeeze=True, wd=wd, input_drop_prob=input_drop_prob, is_train=is_train) if mask is not None: second = exp_mask(second, mask) return second
def softmax(logits, mask=None): if mask is not None: logits = exp_mask(logits, mask) flat_logits = flatten(logits, 1) flat_out = F.softmax(flat_logits) out = reconstruct(flat_out, logits, 1) return out
def linear_logits(linear_layer, args, bias, bias_start=0.0, mask=None, wd=0.0, input_drop_prob=0.0, is_train=None): logits = linear(linear_layer, args, 1, bias, bias_start=bias_start, squeeze=True, wd=wd, input_drop_prob=input_drop_prob, is_train=is_train) if mask is not None: logits = exp_mask(logits, mask) return logits
def sum_logits(args, mask=None): rank = len(args[0].size()) logits = sum(torch.sum(arg, rank - 1) for arg in args) if mask is not None: logits = exp_mask(logits, mask) return logits