def testMulZero(self): sum_ = LogTensor(V(self.x)) * 0 res_sb = sum_.torch() res_th = -np.inf * np.ones(res_sb.size()) assert_all_close(res_th, res_sb)
def testMulNonZero(self): sum_ = LogTensor(V(self.x)) * self.nonzero_const res_sb = sum_.torch() res_th = self.x.double() + math.log(self.nonzero_const) assert_all_close(res_th, res_sb)
def testMulTensors(self): sum_ = LogTensor(V(self.x)) * LogTensor(V(self.y)) res_sb = sum_.torch() res_th = self.x.double() + self.y.double() assert_all_close(res_th, res_sb)
def testSumZero(self): sum_ = LogTensor(V(self.x)) + 0 res_sb = sum_.torch() res_th = self.x assert_all_close(res_th, res_sb)
def testSumNonZero(self): sum_ = LogTensor(V(self.x)) + self.nonzero_const res_sb = sum_.torch() res_th = torch.log(torch.exp(self.x.double()) + self.nonzero_const) assert_all_close(res_th, res_sb)
def testSumTensors(self): sum_ = LogTensor(V(self.x)) + LogTensor(V(self.y)) res_sb = sum_.torch() res_th = torch.log(torch.exp(self.x.double()) + torch.exp(self.y.double())) assert_all_close(res_th, res_sb)
def mul_function(x1, x2): # prepare indices for convolution l1, l2 = len(x1), len(x2) M = min(k + 1, l1 + l2 - 1) indices = [[] for _ in range(M)] for (i, j) in itertools.product(range(l1), range(l2)): if i + j >= M: continue indices[i + j].append((i, j)) # wrap with log-tensors for stability X1 = [LogTensor(x1[i]) for i in range(l1)] X2 = [LogTensor(x2[i]) for i in range(l2)] # perform convolution coeff = [] for c in range(M): coeff.append(isum(X1[i] * X2[j] for (i, j) in indices[c]).torch()) return coeff
def fun(x, y): x_1, x_2 = split(x, y, labels) # all scores are divided by (k * tau) x_1.div_(k * tau) x_2.div_(k * tau) # term 1: all terms that will *not* include the ground truth score # term 2: all terms that will include the ground truth score res = lsp(x_1) term_1, term_2 = res[1], res[0] term_1, term_2 = LogTensor(term_1), LogTensor(term_2) X_2 = LogTensor(x_2) cst = x_2.data.new(1).fill_(float(alpha) / tau) One_by_tau = LogTensor(ag.Variable(cst, requires_grad=False)) Loss_ = term_2 * X_2 loss_pos = (term_1 * One_by_tau + Loss_).torch() loss_neg = Loss_.torch() loss = tau * (loss_pos - loss_neg) return loss
def d_logS_d_expX(S, X, j, p, grad, thresh, eps=1e-5): """ Compute the gradient of log S[j] w.r.t. exp(X). For unstable cases, use p-th order approximnation. """ # ------------------------------------------------------------------------ # Detect unstabilites # ------------------------------------------------------------------------ _X_ = LogTensor(X) _S_ = [LogTensor(S[i]) for i in range(S.size(0))] # recursion of gradient formula (separate terms for stability) _N_, _P_ = recursion(_S_, _X_, j) # detect instability: small relative difference in log-space P, N = _P_.torch(), _N_.torch() diff = (P - N) / (N.abs() + eps) # split into stable and unstable indices u_indices = torch.lt(diff, thresh) # unstable s_indices = u_indices.eq(0) # stable # ------------------------------------------------------------------------ # Compute d S[j] / d X # ------------------------------------------------------------------------ # make grad match size and type of X grad = grad.type_as(X).resize_as_(X) # exact gradient for s_indices (stable) elements if s_indices.sum(): # re-use positive and negative parts of recursion (separate for stability) _N_ = LogTensor(_N_.torch()[s_indices]) _P_ = LogTensor(_P_.torch()[s_indices]) _X_ = LogTensor(X[s_indices]) _S_ = [LogTensor(S[i][s_indices]) for i in range(S.size(0))] # d log S[j] / d exp(X) = (d S[j] / d X) * X / S[j] _SG_ = (_P_ - _N_) * _X_ / _S_[j] grad.masked_scatter_(s_indices, _SG_.torch().exp()) # approximate gradients for u_indices (unstable) elements if u_indices.sum(): _X_ = LogTensor(X[u_indices]) _S_ = [LogTensor(S[i][u_indices]) for i in range(S.size(0))] # positive and negative parts of approximation (separate for stability) _N_, _P_ = approximation(_S_, _X_, j, p) # d log S[j] / d exp(X) = (d S[j] / d X) * X / S[j] _UG_ = (_P_ - _N_) * _X_ / _S_[j] grad.masked_scatter_(u_indices, _UG_.torch().exp()) return grad