def _pairwise_gradients(self, pxs, nxs): # indices of positive examples sp, pp, op = unzip_triples(pxs) # indices of negative examples sn, pn, on = unzip_triples(nxs) pscores = self.af.f(self._scores(sp, pp, op)) nscores = self.af.f(self._scores(sn, pn, on)) #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max())) # find examples that violate margin ind = np.where(nscores + self.margin > pscores)[0] self.nviolations = len(ind) if len(ind) == 0: return # aux vars sp, sn = list(sp[ind]), list(sn[ind]) op, on = list(op[ind]), list(on[ind]) pp, pn = list(pp[ind]), list(pn[ind]) # Increase dimension of scores by one and store it as column (np.newaxis) gpscores = -self.af.g_given_f(pscores[ind])[:, np.newaxis] gnscores = self.af.g_given_f(nscores[ind])[:, np.newaxis] # object role gradients ridx, Sm, n = grad_sum_matrix(pp + pn) start_ccorr = timeit.default_timer() ccorr(self.E[sp], self.E[op]) elapsed_ccorr = timeit.default_timer() - start_ccorr #print("time to compute ccorr = %f us" % (elapsed_ccorr * 1000 * 1000)) #print ("shapes : ", self.E[sp].shape) grp = gpscores * ccorr(self.E[sp], self.E[op]) grn = gnscores * ccorr(self.E[sn], self.E[on]) #gr = (Sm.dot(np.vstack((grp, grn))) + self.rparam * self.R[ridx]) / n # Because of dot product the gradient is calculated sum of n terms that were non-zero # Therefore, for the correct value, we should divide by n gr = Sm.dot(np.vstack((grp, grn))) / n gr += self.rparam * self.R[ridx] # filler gradients eidx, Sm, n = grad_sum_matrix(sp + sn + op + on) start_ccorr = timeit.default_timer() cconv(self.E[sp], self.R[pp]) elapsed_ccorr = timeit.default_timer() - start_ccorr #print("time to compute cconv = %f us" % (elapsed_ccorr * 1000 * 1000)) geip = gpscores * ccorr(self.R[pp], self.E[op]) gein = gnscores * ccorr(self.R[pn], self.E[on]) gejp = gpscores * cconv(self.E[sp], self.R[pp]) gejn = gnscores * cconv(self.E[sn], self.R[pn]) ge = Sm.dot(np.vstack((geip, gein, gejp, gejn))) / n #ge += self.rparam * self.E[eidx] return {'E': (ge, eidx), 'R': (gr, ridx)}
def _pairwise_gradients(self, pxs, nxs): # indices of positive triples sp, pp, op = unzip_triples(pxs) # indices of negative triples sn, pn, on = unzip_triples(nxs) pscores = self._scores(sp, pp, op) nscores = self._scores(sn, pn, on) ind = np.where(nscores + self.margin > pscores)[0] # all examples in batch satify margin criterion self.nviolations = len(ind) if len(ind) == 0: return sp = list(sp[ind]) sn = list(sn[ind]) pp = list(pp[ind]) pn = list(pn[ind]) op = list(op[ind]) on = list(on[ind]) #pg = self.E[sp] + self.R[pp] - self.E[op] #ng = self.E[sn] + self.R[pn] - self.E[on] pg = self.E[op] - self.R[pp] - self.E[sp] ng = self.E[on] - self.R[pn] - self.E[sn] if self.l1: pg = np.sign(-pg) ng = -np.sign(-ng) else: raise NotImplementedError() # entity gradients eidx, Sm, n = grad_sum_matrix(sp + op + sn + on) ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n # relation gradients ridx, Sm, n = grad_sum_matrix(pp + pn) gr = Sm.dot(np.vstack((pg, ng))) / n if self.updateOffsetE > 0: cond = np.where(eidx >= self.updateOffsetE) if len(cond[0]) > 0: ge = ge[cond[0][0]:] eidx = eidx[cond[0][0]:] else: ge = [] eidx = [] gr = [] ridx = [] return {'E': (ge, eidx), 'R': (gr, ridx)}
def _pairwise_gradients(self, pxs, nxs): # indices of positive examples sp, pp, op = unzip_triples(pxs) # indices of negative examples sn, pn, on = unzip_triples(nxs) pscores = self.af.f(self._scores(sp, pp, op)) nscores = self.af.f(self._scores(sn, pn, on)) #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max())) # find examples that violate margin ind = np.where(nscores + self.margin > pscores)[0] self.nviolations = len(ind) if len(ind) == 0: return # aux vars sp, sn = list(sp[ind]), list(sn[ind]) op, on = list(op[ind]), list(on[ind]) pp, pn = list(pp[ind]), list(pn[ind]) gpscores = -self.af.g_given_f(pscores[ind])[:, np.newaxis] gnscores = self.af.g_given_f(nscores[ind])[:, np.newaxis] # object role gradients ridx, Sm, n = grad_sum_matrix(pp + pn) grp = gpscores * ccorr(self.E[sp], self.E[op]) grn = gnscores * ccorr(self.E[sn], self.E[on]) #gr = (Sm.dot(np.vstack((grp, grn))) + self.rparam * self.R[ridx]) / n gr = Sm.dot(np.vstack((grp, grn))) / n gr += self.rparam * self.R[ridx] # filler gradients eidx, Sm, n = grad_sum_matrix(sp + sn + op + on) geip = gpscores * ccorr(self.R[pp], self.E[op]) gein = gnscores * ccorr(self.R[pn], self.E[on]) gejp = gpscores * cconv(self.E[sp], self.R[pp]) gejn = gnscores * cconv(self.E[sn], self.R[pn]) ge = Sm.dot(np.vstack((geip, gein, gejp, gejn))) / n #ge += self.rparam * self.E[eidx] if self.updateOffsetE > 0: cond = np.where(eidx >= self.updateOffsetE) if len(cond[0]) > 0: ge = ge[cond[0][0]:] eidx = eidx[cond[0][0]:] else: ge = [] eidx = [] gr = [] ridx = [] return {'E': (ge, eidx), 'R': (gr, ridx)}
def pairwise_gradients(self, pxs, nxs): # indices of positive triples sp, pp, op = unzip_triples( pxs) # Separate out sub, pred, obj of positive triples # indices of negative triples sn, pn, on = unzip_triples( nxs) # Separate out sub, pred, obj of negative triples pscores = self._scores(sp, pp, op) # Compute loss for positive triples nscores = self._scores(sn, pn, on) # Compute loss for negative triples ind = np.where(nscores + self.margin > pscores)[ 0] # Get indexes where violation is happening # all examples in batch satify margin criterion self.nviolations = len(ind) # Get the number of violations if len(ind) == 0: return # If no violations -> no change required sp, sn = list(sp[ind]), list( sn[ind] ) # Getting violators subs, objs, preds for both +ve and -ve triples op, on = list(op[ind]), list(on[ind]) pp, pn = list(pp[ind]), list(pn[ind]) pg = self.E[sp] + self.R[pp] - self.E[ op] # Compute (h+l-t) for postive triples ng = self.E[sn] + self.R[pn] - self.E[ on] # Compute (h+l-t) for negative triples if self.l1: # In case L1 norm is used pg = np.sign( pg) # grad = sign(h+l-t) | (h+l-t > 0) : 1, else: -1 ng = -np.sign( ng) # grad = sign(h'+l-t') | (h'+l-t' > 0) : -1, else: 1 else: raise NotImplementedError() # entity gradients eidx, Sm, n = grad_sum_matrix(sp + op + sn + on) ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n # relation gradients ridx, Sm, n = grad_sum_matrix(pp + pn) gr = Sm.dot(np.vstack((pg, ng))) / n return {'E': (ge, eidx), 'R': (gr, ridx)}
def _pairwise_gradients(self, pxs, nxs): # indices of positive examples sp, pp, op = unzip_triples(pxs) # indices of negative examples sn, pn, on = unzip_triples(nxs) pscores = self.af.f(self._scores(sp, pp, op)) nscores = self.af.f(self._scores(sn, pn, on)) #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max())) # find examples that violate margin ind = np.where(nscores + self.margin > pscores)[0] self.nviolations = len(ind) if len(ind) == 0: return # aux vars sp, sn = list(sp[ind]), list(sn[ind]) op, on = list(op[ind]), list(on[ind]) pp, pn = list(pp[ind]), list(pn[ind]) gpscores = -self.af.g_given_f(pscores[ind])[:, np.newaxis] gnscores = self.af.g_given_f(nscores[ind])[:, np.newaxis] # object role gradients ridx, Sm, n = grad_sum_matrix(pp + pn) grp = gpscores * ccorr(self.E[sp], self.E[op]) grn = gnscores * ccorr(self.E[sn], self.E[on]) #gr = (Sm.dot(np.vstack((grp, grn))) + self.rparam * self.R[ridx]) / n gr = Sm.dot(np.vstack((grp, grn))) / n gr += self.rparam * self.R[ridx] # filler gradients eidx, Sm, n = grad_sum_matrix(sp + sn + op + on) geip = gpscores * ccorr(self.R[pp], self.E[op]) gein = gnscores * ccorr(self.R[pn], self.E[on]) gejp = gpscores * cconv(self.E[sp], self.R[pp]) gejn = gnscores * cconv(self.E[sn], self.R[pn]) ge = Sm.dot(np.vstack((geip, gein, gejp, gejn))) / n #ge += self.rparam * self.E[eidx] return {'E': (ge, eidx), 'R':(gr, ridx)}
def _gradients(self, xys): ss, ps, os, ys = unzip_triples(xys, with_ys=True) yscores = ys * self._scores(ss, ps, os) self.loss = np.sum(np.logaddexp(0, -yscores)) # preds = af.Sigmoid.f(yscores) fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis] # self.loss -= np.sum(np.log(preds)) ridx, Sm, n = grad_sum_matrix(ps) gr = Sm.dot(fs * ccorr(self.E[ss], self.E[os])) / n gr += self.rparam * self.R[ridx] eidx, Sm, n = grad_sum_matrix(list(ss) + list(os)) ge = Sm.dot( np.vstack((fs * ccorr(self.R[ps], self.E[os]), fs * cconv(self.E[ss], self.R[ps])))) / n ge += self.rparam * self.E[eidx] return {'E': (ge, eidx), 'R': (gr, ridx)}
def _pairwise_gradients(self, pxs, nxs): # indices of positive triples sp, pp, op = unzip_triples(pxs) # indices of negative triples sn, pn, on = unzip_triples(nxs) pscores = self._scores(sp, pp, op) nscores = self._scores(sn, pn, on) ind = np.where(nscores + self.margin > pscores)[0] # all examples in batch satify margin criterion self.nviolations = len(ind) if len(ind) == 0: return sp = list(sp[ind]) sn = list(sn[ind]) pp = list(pp[ind]) pn = list(pn[ind]) op = list(op[ind]) on = list(on[ind]) #pg = self.E[sp] + self.R[pp] - self.E[op] #ng = self.E[sn] + self.R[pn] - self.E[on] pg = self.E[op] - self.R[pp] - self.E[sp] ng = self.E[on] - self.R[pn] - self.E[sn] if self.l1: pg = np.sign(-pg) ng = -np.sign(-ng) else: raise NotImplementedError() # entity gradients eidx, Sm, n = grad_sum_matrix(sp + op + sn + on) ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n # relation gradients ridx, Sm, n = grad_sum_matrix(pp + pn) gr = Sm.dot(np.vstack((pg, ng))) / n return {'E': (ge, eidx), 'R': (gr, ridx)}
def _gradients(self, xys): ss, ps, os, ys = unzip_triples(xys, with_ys=True) yscores = ys * self._scores(ss, ps, os) self.loss = np.sum(np.logaddexp(0, -yscores)) #preds = af.Sigmoid.f(yscores) fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis] #self.loss -= np.sum(np.log(preds)) ridx, Sm, n = grad_sum_matrix(ps) gr = Sm.dot(fs * ccorr(self.E[ss], self.E[os])) / n gr += self.rparam * self.R[ridx] eidx, Sm, n = grad_sum_matrix(list(ss) + list(os)) ge = Sm.dot(np.vstack(( fs * ccorr(self.R[ps], self.E[os]), fs * cconv(self.E[ss], self.R[ps]) ))) / n ge += self.rparam * self.E[eidx] return {'E': (ge, eidx), 'R':(gr, ridx)}
def gradients(self, xys): ss, ps, os, ys = unzip_triples( xys, with_ys=True) # Separates out list of sub, pred, obj, label # define memoized functions to cache expensive dot products # helps only if xys have repeated (s,p) or (p,o) pairs # this happens with sampling @memoized def _EW(s, o, p): return dot(self.E[s], self.W[p]) @memoized def _WE(s, o, p): return dot(self.W[p], self.E[o]) EW = np.array([_EW(*x) for (x, _) in xys]) WE = np.array([_WE(*x) for (x, _) in xys]) yscores = ys * np.sum(self.E[ss] * WE, axis=1) self.loss = np.sum(np.logaddexp(0, -yscores)) fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis] #preds = af.Sigmoid.f(scores) #fs = -(ys / (1 + np.exp(yscores)))[:, np.newaxis] #self.loss -= np.sum(ys * np.log(preds)) #fs = (scores - ys)[:, np.newaxis] #self.loss += np.sum(fs * fs) pidx = np.unique(ps) gw = np.zeros((len(pidx), self.ncomp, self.ncomp)) for i in range(len(pidx)): p = pidx[i] ind = np.where(ps == p)[0] if len(ind) == 1: gw[i] += fs[ind] * np.outer(self.E[ss[ind]], self.E[os[ind]]) else: gw[i] += dot(self.E[ss[ind]].T, fs[ind] * self.E[os[ind]]) / len(ind) gw[i] += self.rparam * self.W[p] eidx, Sm, n = grad_sum_matrix(list(ss) + list(os)) ge = Sm.dot(np.vstack((fs * WE, fs * EW))) / n ge += self.rparam * self.E[eidx] return {'E': (ge, eidx), 'W': (gw, pidx)}
def _gradients(self, xys): ss, ps, os, ys = unzip_triples(xys, with_ys=True) # define memoized functions to cache expensive dot products # helps only if xys have repeated (s,p) or (p,o) pairs # this happens with sampling @memoized def _EW(s, o, p): return dot(self.E[s], self.W[p]) @memoized def _WE(s, o, p): return dot(self.W[p], self.E[o]) EW = np.array([_EW(*x) for (x, _) in xys]) WE = np.array([_WE(*x) for (x, _) in xys]) yscores = ys * np.sum(self.E[ss] * WE, axis=1) self.loss = np.sum(np.logaddexp(0, -yscores)) fs = -(ys * af.Sigmoid.f(-yscores))[:, np.newaxis] #preds = af.Sigmoid.f(scores) #fs = -(ys / (1 + np.exp(yscores)))[:, np.newaxis] #self.loss -= np.sum(ys * np.log(preds)) #fs = (scores - ys)[:, np.newaxis] #self.loss += np.sum(fs * fs) pidx = np.unique(ps) gw = np.zeros((len(pidx), self.ncomp, self.ncomp)) for i in range(len(pidx)): p = pidx[i] ind = np.where(ps == p)[0] if len(ind) == 1: gw[i] += fs[ind] * np.outer(self.E[ss[ind]], self.E[os[ind]]) else: gw[i] += dot(self.E[ss[ind]].T, fs[ind] * self.E[os[ind]]) / len(ind) gw[i] += self.rparam * self.W[p] eidx, Sm, n = grad_sum_matrix(list(ss) + list(os)) ge = Sm.dot(np.vstack((fs * WE, fs * EW))) / n ge += self.rparam * self.E[eidx] return {'E': (ge, eidx), 'W': (gw, pidx)}
def _pairwise_gradients(self, pxs, nxs): # indices of positive examples sp, pp, op = unzip_triples(pxs) # indices of negative examples sn, pn, on = unzip_triples(nxs) pxs, _ = np.array(list(zip(*pxs))) nxs, _ = np.array(list(zip(*nxs))) # define memoized functions to cache expensive dot products # helps only if xys have repeated (s,p) or (p,o) pairs # this happens with sampling @memoized def _EW(s, o, p): return dot(self.E[s], self.W[p]) @memoized def _WE(s, o, p): return dot(self.W[p], self.E[o]) WEp = np.array([_WE(*x) for x in pxs]) WEn = np.array([_WE(*x) for x in nxs]) pscores = self.af.f(np.sum(self.E[sp] * WEp, axis=1)) nscores = self.af.f(np.sum(self.E[sn] * WEn, axis=1)) #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max())) # find examples that violate margin ind = np.where(nscores + self.margin > pscores)[0] self.nviolations = len(ind) if len(ind) == 0: return # aux vars gpscores = -self.af.g_given_f(pscores)[:, np.newaxis] gnscores = self.af.g_given_f(nscores)[:, np.newaxis] pidx = np.unique(list(pp) + list(pn)) gw = np.zeros((len(pidx), self.ncomp, self.ncomp)) for pid in range(len(pidx)): p = pidx[pid] ppidx = np.where(pp == p) npidx = np.where(pn == p) assert (len(ppidx) == len(npidx)) if len(ppidx) == 0 and len(npidx) == 0: continue gw[pid] += dot(self.E[sp[ppidx]].T, gpscores[ppidx] * self.E[op[ppidx]]) gw[pid] += dot(self.E[sn[npidx]].T, gnscores[npidx] * self.E[on[npidx]]) gw[pid] += self.rparam * self.W[p] gw[pid] /= (len(ppidx) + len(npidx)) # entity gradients sp, sn = list(sp[ind]), list(sn[ind]) op, on = list(op[ind]), list(on[ind]) gpscores, gnscores = gpscores[ind], gnscores[ind] EWp = np.array([_EW(*x) for x in pxs[ind]]) EWn = np.array([_EW(*x) for x in nxs[ind]]) eidx, Sm, n = grad_sum_matrix(sp + sn + op + on) ge = (Sm.dot( np.vstack( (gpscores * WEp[ind], gnscores * WEn[ind], gpscores * EWp, gnscores * EWn))) + self.rparam * self.E[eidx]) / n return {'E': (ge, eidx), 'W': (gw, pidx)}
def _pairwise_gradients(self, pxs, nxs): # indices of positive triples #pdb.set_trace() sp, pp, op = unzip_triples(pxs) # indices of negative triples sn, pn, on = unzip_triples(nxs) # Calculate d(h+l, t) = ||h+l-t|| #pdb.set_trace(); pscores = self._scores(sp, pp, op) nscores = self._scores(sn, pn, on) #if not self.l1: # pscores = pscores / 2 # nscores = nscores / 2 # ind contains all violating embeddings # all triplets where margin > pscores - nscores # i.e. pscores - nscores <= margin # So the difference between positive and a negative triple is AT LEAST margin. # If it is less than or equal to margin, then that pair is violating the condition # In this case we want to move # 1. positive sample's h in direction +X and positive sample's t in -Y # 2. negative sample's h in direction -X and negative sample's t in +Y ind = np.where(nscores + self.margin > pscores)[0] #pdb.set_trace(); # Increase violation count for entities involved in a negative tuple # and in a positive tuple list_should_be_updated = set() for i in ind: unique_entities = list(set([sn[i], on[i], sp[i], op[i]])) for u in unique_entities: self.E.violations[u] += 1 list_should_be_updated.add(u) self.nviolations = len(ind) #num_should_be_updated = len(list_should_be_updated) #pdb.set_trace() # all examples in batch satify margin criterion if len(ind) == 0: return sp = list(sp[ind]) sn = list(sn[ind]) pp = list(pp[ind]) pn = list(pn[ind]) op = list(op[ind]) on = list(on[ind]) #pg = self.E[sp] + self.R[pp] - self.E[op] #ng = self.E[sn] + self.R[pn] - self.E[on] #pdb.set_trace() pg = self.E[op] - self.R[pp] - self.E[sp] ng = self.E[on] - self.R[pn] - self.E[sn] #pdb.set_trace() if self.l1: # This part is crucial to understand the derivatives. # Because we are doing L1 norm in the score function, Partial derivative of any component (x1) is going to be 1 # Here pg is the positive gradient, but because we already did +t-h-l (+o-p-s), # we need to inverse the signs of derivatives (i.e. +1 for negative value and -1 for a positive) # The sign is nothing but direction we want to move the vector to. # For ng, which is a negative gradient, derivatives correspond to the sign of components, because # the negative gradient is supposed to be +t-l-h (+o-p-s) pg = np.sign(-pg) #ng = -np.sign(-ng) ng = np.sign(ng) else: # Compute L2 norm derivatives which is (h1 + l1 - t1) pg = -pg ng = ng #raise NotImplementedError() # entity gradients # Sum of sp, op, sn, on = 4 X number of violating tuples #pdb.set_trace(); eidx, Sm, n = grad_sum_matrix(sp + op + sn + on) #pdb.set_trace(); # eidx is the array/list containing all unique entities # Sm has number of rows = eidx's length # dividing by n is the normalization # n contains the list of row sums of matrix Sm # This ensures that all values are x such that -1 <= x <=1 ge = Sm.dot(np.vstack((pg, -pg, ng, -ng))) / n ''' Sm.shape = 5046 X 10932 G = np.vstack(pg,-pg,ng,-ng) pg.shape = (10932/4) X 5 (where 5 is number of components in the vector) Sm.dot(G) = matrix of shape (5046 X 5) = ge Here we have gradients for 5046 entity vectors that will be updated with AdaGrad update function ''' #pdb.set_trace(); # Add the gradient vectors to the list of all gradients for entities and relations # This is for instrumentation purpose. #for e,g in zip(eidx,ge): # self.E.updateVectors[e].append(g) # relation gradients ridx, Sm, n = grad_sum_matrix(pp + pn) #pdb.set_trace(); gr = Sm.dot(np.vstack((pg, ng))) / n #for r,g in zip(ridx, gr): # self.R.updateVectors[r].append(g) #pdb.set_trace(); return {'E': (ge, eidx), 'R': (gr, ridx)}
def _pairwise_gradients(self, pxs, nxs): # indices of positive examples sp, pp, op = unzip_triples(pxs) # indices of negative examples sn, pn, on = unzip_triples(nxs) pxs, _ = np.array(list(zip(*pxs))) nxs, _ = np.array(list(zip(*nxs))) # define memoized functions to cache expensive dot products # helps only if xys have repeated (s,p) or (p,o) pairs # this happens with sampling @memoized def _EW(s, o, p): return dot(self.E[s], self.W[p]) @memoized def _WE(s, o, p): return dot(self.W[p], self.E[o]) WEp = np.array([_WE(*x) for x in pxs]) WEn = np.array([_WE(*x) for x in nxs]) pscores = self.af.f(np.sum(self.E[sp] * WEp, axis=1)) nscores = self.af.f(np.sum(self.E[sn] * WEn, axis=1)) #print("avg = %f/%f, min = %f/%f, max = %f/%f" % (pscores.mean(), nscores.mean(), pscores.min(), nscores.min(), pscores.max(), nscores.max())) # find examples that violate margin ind = np.where(nscores + self.margin > pscores)[0] self.nviolations = len(ind) if len(ind) == 0: return # aux vars gpscores = -self.af.g_given_f(pscores)[:, np.newaxis] gnscores = self.af.g_given_f(nscores)[:, np.newaxis] pidx = np.unique(list(pp) + list(pn)) gw = np.zeros((len(pidx), self.ncomp, self.ncomp)) for pid in range(len(pidx)): p = pidx[pid] ppidx = np.where(pp == p) npidx = np.where(pn == p) assert(len(ppidx) == len(npidx)) if len(ppidx) == 0 and len(npidx) == 0: continue gw[pid] += dot(self.E[sp[ppidx]].T, gpscores[ppidx] * self.E[op[ppidx]]) gw[pid] += dot(self.E[sn[npidx]].T, gnscores[npidx] * self.E[on[npidx]]) gw[pid] += self.rparam * self.W[p] gw[pid] /= (len(ppidx) + len(npidx)) # entity gradients sp, sn = list(sp[ind]), list(sn[ind]) op, on = list(op[ind]), list(on[ind]) gpscores, gnscores = gpscores[ind], gnscores[ind] EWp = np.array([_EW(*x) for x in pxs[ind]]) EWn = np.array([_EW(*x) for x in nxs[ind]]) eidx, Sm, n = grad_sum_matrix(sp + sn + op + on) ge = (Sm.dot(np.vstack(( gpscores * WEp[ind], gnscores * WEn[ind], gpscores * EWp, gnscores * EWn ))) + self.rparam * self.E[eidx]) / n return {'E': (ge, eidx), 'W': (gw, pidx)}