def _upsampled_dft_batch(data, ups, upsample_factor=1, axis_offsets=None): im2pi = 1j * 2 * np.pi # rec1 = np.zeros([data.shape[0],ups.astype(int),ups.astype(int)],dtype='complex128') # for k in range(data.shape[0]): # tdata = data[k] # dim_properties = list(zip(tdata.shape, axis_offsets[k])) # # print(dim_properties) # for (n_items, ax_offset) in dim_properties[::-1]: # kernel = ((np.arange(ups) - ax_offset)[:, None] # * np.fft.fftfreq(n_items, upsample_factor)) # kernel = np.exp(-im2pi * kernel) # tdata = np.tensordot(kernel, tdata, axes=(1, -1)) # rec1[k] = tdata tdata = data.copy() kernel = (cp.tile(cp.arange(ups), (data.shape[0], 1)) - axis_offsets[:, 1:2])[:, :, None] * cp.fft.fftfreq( data.shape[2], upsample_factor) kernel = cp.exp(-im2pi * kernel) tdata = cp.einsum('ijk,ipk->ijp', kernel, tdata) kernel = (cp.tile(cp.arange(ups), (data.shape[0], 1)) - axis_offsets[:, 0:1])[:, :, None] * cp.fft.fftfreq( data.shape[2], upsample_factor) kernel = cp.exp(-im2pi * kernel) rec = cp.einsum('ijk,ipk->ijp', kernel, tdata) return rec
def all_pair_dist_cuda(X1, X2, feat, metric='cosine'): if metric=='cosine': norm1 = cp.einsum('ij, ij->i', X1, X1) norm1 = cp.sqrt(norm1, norm1).reshape(-1, 1) norm2 = cp.einsum('ij, ij->i', X2, X2) norm2 = cp.sqrt(norm2, norm2).reshape(-1, 1) return cp.dot(X2/norm2, (X1/norm1).T) else: n1 = len(X1) n2 = len(X2) nf = len(feat) feat = cp.array(feat) X1 = cp.array(X1.reshape(1, n1, -1)) X2 = cp.array(X2.reshape(1, n2, -1)) mat1 = cp.repeat(X1, n2, axis=0) mat2 = cp.repeat(X2.reshape(n2, 1, nf), n1, axis=1) isbow = cp.repeat(cp.repeat((feat < 2100).reshape(1, 1, nf), n1, axis=1), n2, axis=0) count_mat = nf - cp.sum((mat1 == 0) & (mat2 == 0) & isbow, axis=2) zeros = (count_mat != 0) dist_mat = cp.ones_like(count_mat) dist_mat[zeros] = cp.sum(np.cbs(mat1 - mat2), axis=2)[zeros] / count_mat[zeros] return cp.asnumpy(dist_mat)
def _probability(self, x): """Unnormalized probability of one configuration P(x) Parameters ---------- x : numpy array, shape (n_features,) One configuration Returns ------- probability : float """ w2 = np.reshape(self.w, (self.n_features, self.d, self.D, self.D, self.mu)) tmp = w2[0, x[0], 0, :, :] tmp2 = np.einsum('ij,kj->ik', tmp, np.conj(tmp)).reshape(self.D * self.D) for i in xrange(1, self.n_features - 1): tmp = np.einsum('imj,klj->ikml', w2[i, x[i], :, :, :], np.conj(w2[i, x[i], :, :, :])).reshape( (self.D * self.D, self.D * self.D)) tmp2 = np.dot(tmp2, tmp) tmp = np.einsum( 'ij,kj->ik', w2[self.n_features - 1, x[self.n_features - 1], :, 0, :], np.conj(w2[self.n_features - 1, x[self.n_features - 1], :, 0, :])).reshape(self.D * self.D) probability = np.abs(np.inner(tmp2, tmp)) return probability
def _batch_apply_resolution(deconvolved, Q): """Compute and apply resolution to deconvolved flux""" s = cp.einsum('...ij->...i', Q) resolution = Q / s[..., cp.newaxis] fluxivar = s * s flux = cp.einsum('...ij,...j->...i', resolution, deconvolved) return flux, fluxivar, resolution
def error_minimization(W, b, zeta, a, prev_layer, activation_func, den_activation, y, w=None, d=None, y_pred=None): dW = {} dB = {} delta = {} try: batch_size = y.shape[1] except IndexError: batch_size = 1 y = cp.reshape(y, (y.shape[0], batch_size)) is_last_layer = (type(w) == type(d)) and (type(d) == type(None)) if is_last_layer: delta['s'] = cp.subtract(a['s'], y) dB['s'] = (1 / batch_size) * cp.sum(delta['s'], axis=1) dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1)) delta['s'] = cp.reshape(delta['s'], (delta['s'].shape[0], 1, delta['s'].shape[1])) dW['s'] = (1 / batch_size) * cp.einsum('nik,kjn->nij', delta['s'], a['d'].T) else: w = cp.array(w) deltaW = cp.einsum('nik,kij->nj', w.T, d) deltaW = cp.reshape(deltaW, (deltaW.shape[0], 1, deltaW.shape[1])) a_der = activation(str(activation_func) + '_der', zeta['s']) delta['s'] = cp.multiply(deltaW, a_der) dB['s'] = (1 / batch_size) * cp.sum(delta['s'].squeeze(), axis=1) dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1)) dW['s'] = (1 / batch_size) * cp.einsum('nik,kjn->nij', delta['s'], a['d'].T) deltaW = cp.einsum('nik,kij->knj', W['s'].T, delta['s']) a_der = activation(den_activation + '_der', zeta['d']) delta['d'] = cp.multiply(deltaW, a_der) dB['d'] = (1 / batch_size) * cp.sum(delta['d'], axis=2) dB['d'] = cp.reshape(dB['d'], (dB['d'].shape[0], dB['d'].shape[1], 1)) dW['d'] = (1 / batch_size) * cp.dot(delta['d'], prev_layer.T) return [dW, dB, delta]
def forward(self, x): try: y = cupy.einsum(self.compute_str_forward, x, *self.F_matrix, optimize='optimal') except: # self.path = cupy.einsum_path(self.compute_str_forward, x, *self.F_matrix,optimize='optimal')[0] y = cupy.einsum(self.compute_str_forward, x, *self.F_matrix, optimize='optimal') return y
def cupy_test(shape, times): sumT = 0 for i in range(0, times): arr_gpu = cp.random.rand(shape, shape, dtype=cp.float) start = time.clock() cp.einsum('ij, jk', arr_gpu, arr_gpu) end = time.clock() if i == 0: continue sumT += (end - start) #print((end-start)*1000) times -= 1 avgT = ((sumT / times) * 1000) print("cupy avg time %f" % avgT) return avgT
def DFT_matrix(Nd, om=None): dim = len(Nd) # dimension if om is None: om = fake_Cartesian(Nd) N = numpy.prod(Nd) omN = cupy.zeros((N, dim), dtype=numpy.float64) grid = cupy.indices(Nd) for dimid in range(0, dim): omN[:, dimid] = (grid[dimid].ravel() - Nd[dimid] / 2) M = om.shape[0] A = cupy.einsum('m, n -> mn', om[:, 0], omN[:, 0], optimize='optimal') for d in range(1, dim): A += cupy.einsum('m, n -> mn', om[:, d], omN[:, d], optimize='optimal') return cupy.exp(-1.0j * A)
def hebbian_rule(W, b, zeta, a, prev_layer, activation_func, den_activation, y, w=None, d=None): dW = {} dB = {} delta = None try: batch_size = y.shape[1] except IndexError: batch_size = 1 y = cp.reshape(y, (y.shape[0], batch_size)) y = cp.argmax(y, axis=0).reshape((1, y.shape[1])) a['s'] = cp.reshape(a['s'], (a['s'].shape[0], 1, a['s'].shape[1])) out_in = cp.einsum('nij,nkj->nik', a['s'], a['d']) out_w = cp.einsum('nik,nij->nkj', a['s'], W['s']) out_w_out = cp.einsum('nik,nji->njk', out_w, a['s']) dW['s'] = (1 / batch_size) * (out_in - out_w_out) out_b = cp.einsum('nik,nij->nkj', a['s'], b['s']) out_b_out = cp.einsum('nik,nji->njk', out_b, a['s']) dB['s'] = (1 / batch_size) * cp.sum(y, axis=1) dB['s'] = cp.reshape(dB['s'], (dB['s'].shape[0], 1, 1)) # prev_layer = cp.reshape(prev_layer,(prev_layer.shape[0],1,prev_layer.shape[1])) out_in = cp.einsum('nij,kj->nik', a['d'], prev_layer) out_w = cp.einsum('nik,nij->nkj', a['d'], W['d']) out_w_out = cp.einsum('nik,nji->njk', out_w, a['d']) dW['d'] = (1 / batch_size) * (out_in - out_w_out) out_b = cp.einsum('nik,nij->nkj', a['d'], b['d']) out_b_out = cp.einsum('nik,nji->njk', out_b, a['d']) dB['d'] = (out_in - out_b_out) dB['d'] = (1 / batch_size) * cp.sum(dB['d'], axis=2) dB['d'] = cp.reshape(dB['d'], (dB['d'].shape[0], dB['d'].shape[1], 1)) return [dW, dB, delta]
def gradient(self, x0, y_true): def func(a, t, params, A, function, bT, x, division): index = int(t * (division - 1)) return cp.multiply( -1., cp.add( cp.dot(a, params[1][index]), cp.dot( cp.multiply( bT, cp.multiply(params[0][index], function(cp.dot(x[index], A.T)))), A))) n_data = len(x0) y_pred = self(x0) aT = cp.zeros_like(x0, dtype=cp.float32) bT = cp.divide(cp.subtract(y_pred, y_true), n_data) a = euler(func, aT, self.t[::-1], args=(self.params, self.A, self.d_function, bT, self.x, self.division)) g_alpha = cp.sum( cp.multiply(bT, self.function(cp.dot(self.x, self.A.T))), 1) g_beta = cp.einsum("ilj,ilk->ijk", a[::-1], self.x) g_gamma = cp.sum(a[::-1], 1) return (g_alpha, g_beta, g_gamma)
def row_norms(X, squared=False): """Row-wise (squared) Euclidean norm of X. Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse matrices. Performs no input validation. Parameters ---------- X : array_like The input array squared : bool, optional (default = False) If True, return squared norms. Returns ------- array_like The row-wise (squared) Euclidean norm of X. """ if sparse.issparse(X): if isinstance( X, (sparse.csr_matrix, sparse.csc_matrix, sparse.coo_matrix)): X_copy = X.copy() X_copy.data = np.square(X_copy.data) norms = X_copy.sum(axis=1).squeeze() else: raise ValueError('Sparse matrix not compatible') else: norms = np.einsum('ij,ij->i', X, X) if not squared: np.sqrt(norms, norms) return norms
def triangle_ke(self, coord): ''' function that calculates ke takes: coord - coordinates of each triangle's nodes - shape (n_triangles, 3, 2) returns: ke_array - an array of stiffness matrices for all elements (n_triangles, 3, 3) ''' s = cp.array(coord[:, [2, 0, 1]] - coord[:, [1, 2, 0]]) # shape (n_tri, 3, 2) ke_matrix = cp.empty((len(coord), 3, 3)) area = cp.abs(0.5 * self.det2x2(s[:, 0], s[:, 1])) #print(type(s)) #print(type(s.T)) ke_matrix[:] = cp.einsum('ijk,kli->ijl', s, s.T) / (4. * area[:, None, None]) # A = s # B = s.T # i,j,k,l = A.shape # A = cp.reshape(A,(j,k*l*i)) # B = cp.reshape(B,(1,k*l*i)) # C = cp.sum(A*B,axis=2) #casting = 'same_kind' #C = np.einsum('ijk,kli->ijl', s, s.T,casting = 'same_kind', dtype = cp.core.core.ndarray) #ke_matrix[:] = C/(4. * area[:, None, None]) return ke_matrix
def local_cov_bet_class_NN(self,key,label,nb_class,batchsize,k): key_broadcast=cp.broadcast_to(key,(batchsize,batchsize,key.shape[1])) key_broadcast_transpose=cp.transpose(cp.broadcast_to(key,(batchsize,batchsize,key.shape[1])),axes=(1,0,2)) sub_key_broadcast=key_broadcast-key_broadcast_transpose norm_sub_broadcast=cp.linalg.norm(sub_key_broadcast,axis=2) sorted_d=cp.sort(norm_sub_broadcast,axis=0) kth_d=sorted_d[k] kth_d=kth_d.reshape([batchsize,1]) sigma=cp.matmul(kth_d,cp.transpose(kth_d)) batchsize_per_class=batchsize//nb_class index=cp.arange(key.shape[0]) xx,yy=cp.meshgrid(index,index) sub=key[xx]-key[yy] norm_sub=cp.linalg.norm(sub,axis=2) a1=cp.exp(-norm_sub*norm_sub/sigma) lindex=cp.arange(label.shape[0]) lx,ly=cp.meshgrid(lindex,lindex) l=(label[lx]==label[ly]) a1=a1*l*(1.0/(batchsize*nb_class)-1.0/batchsize_per_class) l2=(label[lx]!=label[ly]) a2=l2*(1.0/batchsize) a=a1+a2 a=a.reshape([a.shape[0],a.shape[1],1]) a_sub=a*sub Sb=cp.einsum('ijk,ijl->kl',a_sub,sub,dtype='float32')*0.5 return Sb
def gpu(): cp.cuda.Stream.null.synchronize() ga = cp.asarray(a) gb = cp.asarray(b) gpu_c = cp.einsum('ij,ij->i', ga, gb) gc = cp.asnumpy(gpu_c) return gc
def findJac(self, ex_mat, perm0, ke, f, r_el): ''' Calculates Jacobian for all measurements takes: ex_mat - array shape (n_source/sinks, 2) - excitation matrix with source and sink for each measurement perm0 - array shape (n_triangles) - initial permittivity on each triangle ke - array shape (n_triangles, n_vertices, n_vertices) - stiffness on each element matrix f - array shape (n_nodes) - voltage on each node of mesh r_el - inverse of global stiffness matrix on electrodes returns: jac - array shape ( n_measurements, n_electrodes,n_triangles) - Jacobian for all measurements ''' # initialise array for Jacobian jac = cp.zeros((ex_mat.shape[0], self.ne, self.n_tri), dtype=perm0.dtype) # calculating jacobian jac[:] = cp.einsum('ijk, jkp, ljp->lij', r_el[:, self.tri], ke, f[:, self.tri], optimize='optimal') #jac = cp.zeros((ex_mat.shape[0], self.ne, self.n_tri), dtype=perm0.dtype) #jac_all_el_pts = jac_all_el_pts.reshape((ex_mat.shape[0], self.ne, self.n_per_el, self.n_tri)) #jac[:] = (1. / self.n_per_el) * np.sum(jac_all_el_pts, axis=2) return jac
def row_norms(X, squared=False): """Row-wise (squared) Euclidean norm of X. Equivalent to np.sqrt((X * X).sum(axis=1)), but also supports sparse matrices and does not create an X.shape-sized temporary. Performs no input validation. Parameters ---------- X : array_like The input array squared : bool, optional (default = False) If True, return squared norms. Returns ------- array_like The row-wise (squared) Euclidean norm of X. """ if sparse.issparse(X): if not isinstance(X, sparse.csr_matrix): X = sparse.csr_matrix(X) # norms = csr_row_norms(X) else: norms = np.einsum('ij,ij->i', X, X) if not squared: np.sqrt(norms, norms) return norms
def _upsampled_dft(self, data, ups, upsample_factor=1, axis_offsets=None): im2pi = 1j * 2 * np.pi tdata = data.copy() kernel = (cp.tile(cp.arange(ups), (data.shape[0], 1)) - axis_offsets[:, 1:2])[:, :, None] * cp.fft.fftfreq( data.shape[2], upsample_factor) kernel = cp.exp(-im2pi * kernel) tdata = cp.einsum('ijk,ipk->ijp', kernel, tdata) kernel = (cp.tile(cp.arange(ups), (data.shape[0], 1)) - axis_offsets[:, 0:1])[:, :, None] * cp.fft.fftfreq( data.shape[1], upsample_factor) kernel = cp.exp(-im2pi * kernel) rec = cp.einsum('ijk,ipk->ijp', kernel, tdata) return rec
def grad_h(self, w, i=None, j=None): '''Gradient of h(x) at w. Depending on the shape of w and parameters i and j, this function behaves differently: 1. If w is a vector of shape (dim,) 1.1 If i is None and j is None returns the full gradient. 1.2 If i is not None and j is None returns the gradient at the i-th agent. 1.3 If i is None and j is not None returns the i-th gradient of all training data. 1.4 If i is not None and j is not None returns the gradient of the j-th data sample at the i-th agent. Note i, j can be integers, lists or vectors. 2. If w is a matrix of shape (dim, n_agent) 2.1 if j is None returns the gradient of each parameter at the corresponding agent 2.2 if j is not None returns the gradient of each parameter of the j-th sample at the corresponding agent. Note j can be lists of lists or vectors. ''' if w.ndim == 1: if type(j) is int: j = [j] if i is None and j is None: # Return the full gradient return self.X_train.T.dot( logit_1d(self.X_train, w) - self.Y_train) / self.m_total + w * self.LAMBDA elif i is not None and j is None: return self.X[i].T.dot(logit_1d(self.X[i], w) - self.Y[i]) / self.m + w * self.LAMBDA elif i is None and j is not None: # Return the full gradient return self.X_train[j].T.dot( logit_1d(self.X_train[j], w) - self.Y_train[j]) / len(j) + w * self.LAMBDA else: # Return the gradient of sample j at machine i return (logit_1d(self.X[i][j], w) - self.Y[i][j]).dot( self.X[i][j]) / len(j) + w * self.LAMBDA elif w.ndim == 2: if i is None and j is None: # Return the distributed gradient tmp = logit_2d(self.X, w) - self.Y return xp.einsum('ikj,ik->ji', self.X, tmp) / self.m + w * self.LAMBDA elif i is None and j is not None: # Return the stochastic gradient res = [] for i in range(self.n_agent): if type(j[i]) is int: samples = [j[i]] else: samples = j[i] res.append(self.X[i][samples].T.dot( logit_1d(self.X[i][samples], w[:, i]) - self.Y[i][samples]) / len(samples) + w[:, i] * self.LAMBDA) return xp.array(res).T else: log.fatal('For distributed gradients j must be None') else: log.fatal('Parameter dimension should only be 1 or 2')
def _derivativenorm(self): """Compute the derivative of the norm Returns ------- derivative : numpy array, shape (m_parameters,) """ w2 = np.reshape(self.w, (self.n_features, self.d, self.D, self.D, self.mu)) derivative = np.zeros( (self.n_features, self.d, self.D, self.D, self.mu), dtype=np.complex128) tmp = np.zeros((self.n_features, self.D * self.D), dtype=np.complex128) tmp2 = np.zeros((self.n_features, self.D * self.D), dtype=np.complex128) tmp[0, :] = np.einsum('ijk,ilk->jl', w2[0, :, 0, :, :], np.conj(w2[0, :, 0, :, :])).reshape(self.D * self.D) for i in xrange(1, self.n_features - 1): newtmp = np.einsum('pimj,pklj->ikml', w2[i, :, :, :, :], np.conj(w2[i, :, :, :, :])).reshape( (self.D * self.D, self.D * self.D)) tmp[i, :] = np.dot(tmp[i - 1, :], newtmp) newtmp = np.einsum('ijk,ilk->jl', w2[self.n_features - 1, :, :, 0, :], np.conj(w2[self.n_features - 1, :, :, 0, :])).reshape(self.D * self.D) mpscontracted = np.inner(tmp[self.n_features - 2, :], newtmp) tmp[self.n_features - 1, :] = mpscontracted tmp2[self.n_features - 1, :] = newtmp for i in xrange(self.n_features - 2, -1, -1): newtmp = np.einsum('pimj,pklj->ikml', w2[i, :, :, :, :], np.conj(w2[i, :, :, :, :])).reshape( (self.D * self.D, self.D * self.D)) tmp2[i, :] = np.dot(newtmp, tmp2[i + 1, :]) newtmp = np.einsum('ijk,ilk->jl', w2[0, :, 0, :, :], np.conj(w2[0, :, 0, :, :])).reshape(self.D * self.D) tmp2[0, :] = np.inner(newtmp, tmp2[1, :]) for j in xrange(self.d): derivative[0, j, 0, :, :] = 2 * np.einsum( 'ij,il->lj', w2[0, j, 0, :, :], tmp2[1, :].reshape( self.D, self.D)) derivative[self.n_features-1,j,:,0,:]=\ 2*np.einsum('ij,il->lj',w2[self.n_features-1,j,:,0,:], tmp[self.n_features-2,:].reshape(self.D,self.D)) for i in xrange(1, self.n_features - 1): temp1 = tmp[i - 1, :].reshape(self.D, self.D) temp2 = tmp2[i + 1, :].reshape(self.D, self.D) for j in xrange(self.d): derivative[i, j, :, :, :] = 2 * np.einsum( 'ikm,ij,kl->jlm', w2[i, j, :, :, :], temp1, temp2) return derivative.reshape(self.m_parameters)
def get_backprop_updates(self, forward_pass, target, ortho_weighting=0.0): # Updates will be stored and returned weight_updates = [] bias_updates = [] # The update will be done layer-wise with a backpropagating signal nb_layers = len(self.layers) error = forward_pass[-1] - target for layer_index in range(nb_layers)[::-1]: if self.layers[layer_index].linear: layer_derivatives = xp.ones((error.shape)) else: layer_derivatives = self.layers[ layer_index].transfer_derivative_func( self.layers[layer_index].transfer_inverse_func( forward_pass[layer_index + 1])) # Calculate updates for this layer weight_update = xp.mean(xp.einsum( 'nj, ni -> nij', layer_derivatives * error, forward_pass[layer_index][:, :self.net_structure[layer_index + 1]]), axis=0) bias_update = xp.mean(layer_derivatives * error, axis=0) # Calculating a weight update based upon a soft orthogonal regularizer if ortho_weighting != 0.0: weight_update += self.ortho_gradients(ortho_weighting, layer_index) # Collect updates weight_updates.append(-weight_update) bias_updates.append(-bias_update) # Propagate the error to the next layer error *= layer_derivatives error = xp.einsum('nj, ij -> ni', error, self.layers[layer_index].weight_matrix) error = xp.hstack([ error, xp.zeros((error.shape[0], self.net_structure[layer_index] - error.shape[1])) ]) return weight_updates[::-1], bias_updates[::-1]
def sample(self, n_samples=1, random_state=None): """ Generate random samples from the model. Currently, this is implemented only for gaussian and tophat kernels, and the Euclidean metric. Parameters ---------- n_samples : int, default=1 Number of samples to generate. random_state : int, cupy RandomState instance or None, default=None Returns ------- X : cupy array of shape (n_samples, n_features) List of samples. """ if not hasattr(self, "X_"): raise NotFittedError() supported_kernels = ["gaussian", "tophat"] if (self.kernel not in supported_kernels or self.metric != "euclidean"): raise NotImplementedError( "Only {} kernels, and the euclidean" " metric are supported.".format(supported_kernels)) if isinstance(random_state, cp.random.RandomState): rng = random_state else: rng = cp.random.RandomState(random_state) u = rng.uniform(0, 1, size=n_samples) if self.sample_weight_ is None: i = (u * self.X_.shape[0]).astype(np.int64) else: cumsum_weight = cp.cumsum(self.sample_weight_) sum_weight = cumsum_weight[-1] i = cp.searchsorted(cumsum_weight, u * sum_weight) if self.kernel == "gaussian": return cp.atleast_2d(rng.normal(self.X_[i], self.bandwidth)) elif self.kernel == "tophat": # we first draw points from a d-dimensional normal distribution, # then use an incomplete gamma function to map them to a uniform # d-dimensional tophat distribution. has_scipy(raise_if_unavailable=True) dim = self.X_.shape[1] X = rng.normal(size=(n_samples, dim)) s_sq = cp.einsum("ij,ij->i", X, X).get() # do this on the CPU becaause we don't have # a gammainc function readily available correction = cp.array( gammainc(0.5 * dim, 0.5 * s_sq)**(1.0 / dim) * self.bandwidth / np.sqrt(s_sq)) return self.X_[i] + X * correction[:, np.newaxis]
def __call__(self, batch_emb, batch_words): nn_idx = self.get_neighobors(batch_emb, batch_words) weights = self.induce_weights(batch_emb, nn_idx) nn_spec_emb = self._spec_emb[nn_idx] ret = cupy.einsum('ijk,ij->ik', nn_spec_emb, weights) return ret
def adjoint(self, y): # print(self.compute_str_adj) x = cupy.einsum( self.compute_str_adj, y, *[self.F_matrix[dimid].conj() for dimid in range(0, self.ndims)], optimize='optimal') x /= self.scale return x.reshape(self.Nd)
def get_gait_updates(self, forward_pass, targets, ortho_weighting=0.0, gamma=0.001): # Updates will be stored and returned weight_updates = [] bias_updates = [] nb_layers = len(self.layers) inverse = targets mult_factor = 1.0 for layer_index in range(nb_layers)[::-1]: error = mult_factor * (forward_pass[layer_index + 1] - inverse) if self.layers[layer_index].linear: layer_derivatives = xp.ones((error.shape)) else: layer_derivatives = self.layers[ layer_index].transfer_derivative_func( self.layers[layer_index].transfer_inverse_func( forward_pass[layer_index + 1])) # Calculate updates for this layer weight_update = xp.mean(xp.einsum( 'nj, ni -> nij', layer_derivatives * error, forward_pass[layer_index][:, :self.net_structure[layer_index + 1]]), axis=0) bias_update = xp.mean(layer_derivatives * error, axis=0) # Calculating a weight update based upon a soft orthogonal regularizer if ortho_weighting != 0.0: weight_update += self.ortho_gradients(ortho_weighting, layer_index) # Collect updates weight_updates.append(-weight_update) bias_updates.append(-bias_update) grad_adjusted_inc_factor = gamma * layer_derivatives * layer_derivatives inverse = self.layers[layer_index].inverse( ((1.0 - grad_adjusted_inc_factor) * forward_pass[layer_index + 1] + grad_adjusted_inc_factor * inverse)) mult_factor = mult_factor / gamma # Adding the auxilliary neurons on inverse = xp.hstack([ inverse, forward_pass[layer_index][:, self.net_structure[layer_index + 1]:] ]) return weight_updates[::-1], bias_updates[::-1]
def induce_weights(self, batch_emb, nn_idx): nn_gen_emb = self._gen_emb[nn_idx] diff = batch_emb[:, None] - nn_gen_emb C = cupy.einsum('ijk,ilk->ijl', diff, diff) C_inv = inv_gpu(C) w = cupy.sum(C_inv, axis=1) / cupy.sum(C_inv, axis=(1, 2))[:, None] return w
def grad_h(self, w, i=None, j=None, split='train'): '''Gradient of h(x) at w. Depending on the shape of w and parameters i and j, this function behaves differently: 1. If w is a vector of shape (dim,) 1.1 If i is None and j is None returns the full gradient. 1.2 If i is not None and j is None returns the gradient at the i-th agent. 1.3 If i is None and j is not None returns the i-th gradient of all training data. 1.4 If i is not None and j is not None returns the gradient of the j-th data sample at the i-th agent. Note i, j can be integers, lists or vectors. 2. If w is a matrix of shape (dim, n_agent) 2.1 if j is None returns the gradient of each parameter at the corresponding agent 2.2 if j is not None returns the gradient of each parameter of the j-th sample at the corresponding agent. Note j can be lists of lists or vectors. ''' if w.ndim == 1: if type(j) is int: j = [j] if i is None and j is None: # Return the full gradient return self.H.dot(w) - self.X_T_Y elif i is not None and j is None: # Return the local gradient return self.H_list[i].dot(w) - self.X_T_Y_list[i] elif i is None and j is not None: # Return the stochastic gradient return (self.X_train[j].dot(w) - self.Y_train[j]).dot( self.X_train[j]) / len(j) else: # Return the stochastic gradient return (self.X[i][j].dot(w) - self.Y[i][j]).dot( self.X[i][j]) / len(j) elif w.ndim == 2: if i is None and j is None: # Return the distributed gradient return xp.einsum('ijk,ki->ji', self.H_list, w) - self.X_T_Y_list.T elif i is None and j is not None: # Return the stochastic gradient res = [] for i in range(self.n_agent): if type(j[i]) is int: samples = [j[i]] else: samples = j[i] res.append((self.X[i][samples].dot(w[:, i]) - self.Y[i][samples]).dot(self.X[i][samples]) / len(samples)) return xp.array(res).T else: log.fatal('For distributed gradients j must be None') else: log.fatal('Parameter dimension should only be 1 or 2')
def get_gait_updates(self, forward_pass, targets, ortho_weighting=0.0, gamma=0.001): # Updates will be stored and returned weight_updates = [] bias_updates = [] # We must compute errors layer-wise. # In our formulation, each layer's error is partly difference nb_layers = len(self.layers) # Calculating the inverse target inverse = targets mult_factor = 1.0 # Running backwards through layers for layer_index in range(nb_layers)[::-1]: error = mult_factor * (forward_pass[layer_index + 1] - inverse) if self.layers[layer_index].linear: layer_derivatives = xp.ones((error.shape)) else: layer_derivatives = self.layers[ layer_index].transfer_derivative_func( self.layers[layer_index].transfer_inverse_func( forward_pass[layer_index + 1])) error *= layer_derivatives # Calculate updates for this layer weight_update = xp.mean(xp.einsum('nj, ni -> nij', error, forward_pass[layer_index]), axis=0) bias_update = xp.mean(error, axis=0) # Calculating a weight update based upon a soft orthogonal regularizer if ortho_weighting != 0.0: weight_update += self.ortho_gradients(ortho_weighting, layer_index) # Collect updates weight_updates.append(-weight_update) bias_updates.append(-bias_update) # Adjust and calculate the next layers target grad_adjusted_inc_factor = gamma * layer_derivatives * layer_derivatives inverse = self.layers[layer_index].inverse( ((1.0 - grad_adjusted_inc_factor) * forward_pass[layer_index + 1] + grad_adjusted_inc_factor * inverse)) mult_factor = mult_factor / gamma return weight_updates[::-1], bias_updates[::-1]
def _computenorm(self): """Compute norm of probability distribution Returns ------- norm : float """ w2 = np.reshape(self.w, (self.n_features, self.d, self.D, self.D, self.mu)) tmp2 = np.einsum('ijk,ilk->jl', w2[0, :, 0, :, :], np.conj(w2[0, :, 0, :, :])).reshape(self.D * self.D) for i in xrange(1, self.n_features - 1): tmp = np.einsum('pimj,pklj->ikml', w2[i, :, :, :, :], np.conj(w2[i, :, :, :, :])).reshape( (self.D * self.D, self.D * self.D)) tmp2 = np.dot(tmp2, tmp) tmp = np.einsum('ijk,ilk->jl', w2[self.n_features - 1, :, :, 0, :], np.conj(w2[self.n_features - 1, :, :, 0, :])).reshape(self.D * self.D) norm = np.abs(np.inner(tmp2, tmp)) return norm
def _einsum(self, expr, operands): result = cp.einsum(expr.indices_string, *(operand.tsr for operand in operands), optimize='greedy') if isinstance(result, cp.ndarray) and result.ndim != 0: newshape = expr.outputs[0].newshape(result.shape) result = result.reshape(*newshape) return self.tensor(result) elif isinstance(result, cp.ndarray): return result.item() else: return result
def local_cov_in_class(self,key,label,nb_class,batchsize): index = cp.arange(key.shape[0]) xx,yy = cp.meshgrid(index,index) sub = key[xx] - key[yy] norm_sub = cp.linalg.norm(sub,axis=2) a = cp.exp(-norm_sub*norm_sub/100) lindex = cp.arange(label.shape[0]) lx,ly = cp.meshgrid(lindex,lindex) l = (label[lx]==label[ly]) a = a*l Sw = cp.einsum('ij,ijk,ijl->kl',a,sub,sub,dtype='float32')*0.5*(1.0/batchsize) return Sw