def test_scalar_mult(): m = 256 n = 128 alpha = np.pi a = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') t = np.array(np.empty((m, n)), dtype=np.float32, order='F') c = a * alpha m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(t) m1.mult(alpha, target=m2) m1.mult(alpha) m1.copy_to_host() m2.copy_to_host() assert np.max(np.abs(c - m1.numpy_array) ) < 10**-4, "Error in CUDAMatrix.mult exceeded threshold" assert np.max(np.abs(c - m2.numpy_array) ) < 10**-4, "Error in CUDAMatrix.mult exceeded threshold"
def test_assign_scalar(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') m1 = cm.CUDAMatrix(a) m1.assign(np.pi) m1.copy_to_host() assert np.max( np.abs(m1.numpy_array - np.pi) ) < 10**-4, "Error in CUDAMatrix.assign_scalar exceeded threshold"
def test_pow_matrix(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 20, dtype=np.float32, order='F') b = np.array(np.random.rand(m, n), dtype=np.float32, order='F') p = np.array(np.random.randn(m, n), dtype=np.float32, order='F') c = a**p m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) mp = cm.CUDAMatrix(p) cm.pow(m1, mp, target=m2) cm.pow(m1, mp) m1.copy_to_host() m2.copy_to_host() assert np.max(np.abs(c - m1.numpy_array) ) < 10**-2, "Error in cudamat.pow exceeded threshold" assert np.max(np.abs(c - m2.numpy_array) ) < 10**-2, "Error in cudamat.pow exceeded threshold"
def test_slice(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') c = np.array(a[:, 32:64], order='F') m1 = cm.CUDAMatrix(a) m2 = m1.slice(32, 64) m2.copy_to_host() assert np.max(np.abs(c - m2.numpy_array) ) < 10**-4, "Error in CUDAMatrix.slice exceeded threshold"
def test_tanh(): m = 256 n = 128 a = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') c = np.tanh(a) m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) m1.apply_tanh(target=m2) m1.apply_tanh() m1.copy_to_host() m2.copy_to_host() assert np.max( np.abs(c - m1.numpy_array )) < 10**-4, "Error in CUDAMatrix.apply_tanh exceeded threshold" assert np.max( np.abs(c - m2.numpy_array )) < 10**-4, "Error in CUDAMatrix.apply_tanh exceeded threshold"
def initTemporary(self): self.hActs = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.hActProbs = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.negVis = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.tempVisMB = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.dynamicHidBias = cm.CUDAMatrix( reformat(num.zeros((self.numHid, self.mbsz)))) self.dynamicVisBias = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) self.sMask = num.zeros((self.numVis, self.mbsz)) self.sMask[:self.smsz, :] = 1 self.gaussMask = 1 - self.sMask self.onesCol = cm.CUDAMatrix(reformat(num.ones((self.numVis, 1)))) self.sMask = cm.CUDAMatrix(reformat(self.sMask)) self.gaussMask = cm.CUDAMatrix(reformat(self.gaussMask)) self.tempRow = cm.CUDAMatrix(reformat(num.zeros((1, self.mbsz))))
def test_set_row_slice(): m = 256 n = 128 start = 11 end = 54 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.rand(end - start, n) * 10, dtype=np.float32, order='F') c = a.copy() c[start:end, :] = b m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) m1.set_row_slice(start, end, m2) m1.copy_to_host() assert np.max( np.abs(c - m1.numpy_array) ) < 10**-4, "Error in CUDAMatrix.set_row_slice exceeded threshold"
def test_correlate(): m = 64 n = 32 km = 17 kn = 11 a = np.array(np.random.randn(m, n)*10, dtype=np.float32, order='F') k = np.array(np.random.randn(km, kn)*10, dtype=np.float32, order='F') res = np.zeros_like(a) for i in range(len(a)): for j in range(len(a[0])): for h in range(-(km/2), km/2 + 1): for w in range(-(kn/2), kn/2 + 1): if i+h >= 0 and i+h < m and j+w >= 0 and j+w < n: res[i][j] += a[i + h][j + w] * k[km/2 + h][kn/2 + w] a_d = cm.CUDAMatrix(a) k_d = cm.CUDAMatrix(k) res_d = cm.correlate(a_d, k_d) assert np.abs(res-res_d.asarray()).max() < 1e-2, "Error in cudamat.correlate"
def normolize(feat): #feat_temp = np.vstack((feat, feat)) feat = np.reshape(feat, (4096, 1)) a = cm.CUDAMatrix(feat) c = cm.dot(a.T, a) c = cm.sqrt(c) c = c.asarray() feat = feat / c[0] ''' for index,item in enumerate(feat): feat[index,:]=item/(c[index][index]) ''' return feat
def search(indices, feat, feature_map, ID_map): feature_table = None ID_table = None indices = indices[0][0:2] #print indices #print feature_map[0][0],ID_map[0][0:4] for category in indices: if feature_table is None: #print category if (feature_map[category]): feature_table = np.copy(feature_map[category]) if (ID_map[category]): ID_table = np.copy(ID_map[category]) else: if (feature_map[category]): feature_table = np.vstack( (feature_table, feature_map[category])) if (ID_map[category]): ID_table = np.hstack((ID_table, ID_map[category])) #print feature_table[1] a = cm.CUDAMatrix(feat) #print feat c = cm.CUDAMatrix(feature_table) d = cm.dot(c, a) e = d.asarray() #print e ind = np.argsort(-e, axis=0) ind = ind[0:100] #print ind ID_result = ID_table[ind] ''' for index in ind: if ID_result is None: ID_result = np.copy(ID_map[index]) else: ID_result = np.hstack((ID_result, ID_map[index])) ''' return ID_result
def search(indices,feat,feature_map,ID_map): feature_table=None ID_table=None indices = indices[0][0:2] a=27 indices = np.hstack((indices,a)) #print indices #print feature_map[0][0],ID_map[0][0:4] a = cm.CUDAMatrix(feat) for category in indices: if feature_table is None: #print category if (feature_map[category]): d = cm.dot(feature_map[category], a) f = d.asarray() feature_table = np.copy(f) if (ID_map[category]): ID_table = np.copy(ID_map[category]) else: if (feature_map[category]): d = cm.dot(feature_map[category], a) f = d.asarray() feature_table = np.vstack((feature_table, f)) if (ID_map[category]): ID_table = np.hstack((ID_table, ID_map[category])) #print feature_table[1] #ID_table = np.hstack((ID_table, ID_zl_array)) ''' #print feat c = cm.CUDAMatrix(feature_table) d = cm.dot(c, a) q = cm.dot(feature_zl_cm, a) t = d.asarray() r = q.asarray() e = np.vstack((t,r)) ''' #print e ind = np.argsort(-feature_table,axis=0) ind = ind [0:200] #print ind ID_result=ID_table[ind] ''' for index in ind: if ID_result is None: ID_result = np.copy(ID_map[index]) else: ID_result = np.hstack((ID_result, ID_map[index])) ''' return ID_result
def forward(self, sample1, sample2): costMatrix = self.ccdist(sample1, sample2).double()**2 if self.normalize_cost: with torch.no_grad(): maxCost = costMatrix.data.max() costMatrix = costMatrix/maxCost with torch.no_grad(): cost_gpu = cudamat.CUDAMatrix(costMatrix.data.numpy()) tranport, opt_log = ot.gpu.sinkhorn(numpy.ones((sample1.size(0),))/sample1.size(0), numpy.ones((sample2.size(0),))/sample2.size(0), cost_gpu, float(self.entropy_reg.numpy()[0]), log=True ) transport = torch.DoubleTensor(tranport) distance = torch.sum(transport*costMatrix) return distance
def setVariables(self): n, m, r = self.n, self.m, self.rank self.G_gpu = cm.CUDAMatrix(self.G) self.W_gpu = cm.CUDAMatrix(self.W) self.X_gpu = cm.CUDAMatrix(self.X) self.XTX_gpu= cm.dot(self.X_gpu.T, self.X_gpu) self.XTXpos_gpu = cm.empty((m,m)) self.XTX_gpu.greater_than(0, target=self.XTXpos_gpu) self.XTXpos_gpu.mult(self.XTX_gpu) self.XTXneg_gpu = cm.empty((m,m)) self.XTXpos_gpu.subtract(self.XTX_gpu, target=self.XTXneg_gpu) self.XTXnegW_gpu = cm.empty((m,r)) self.XTXposW_gpu = cm.empty((m,r)) self.GWT_gpu = cm.empty((m,m)) self.update1_gpu = cm.empty((m,r)) self.update2_gpu = cm.empty((m,r)) self.GTG_gpu = cm.empty((r,r)) self.XTXnegG_gpu = cm.empty((m,r)) self.XTXposG_gpu = cm.empty((m,r))
def test_transpose(): m = 6 n = 128 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.rand(n, m), dtype=np.float32, order='F') c = a.copy().T m = cm.CUDAMatrix(a) mt1 = cm.CUDAMatrix(b) m.transpose(target=mt1) mt2 = m.transpose() mt1.copy_to_host() mt2.copy_to_host() assert np.max( np.abs(c - mt1.numpy_array )) < 10**-4, "Error in CUDAMatrix.transpose exceeded threshold" assert np.max( np.abs(c - mt2.numpy_array )) < 10**-4, "Error in CUDAMatrix.transpose exceeded threshold"
def test_sign(): m = 256 n = 128 a = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') a[0, 0] = 0. a[0, 1] = -0. t = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') c = np.sign(a) m1 = cm.CUDAMatrix(a) m3 = cm.CUDAMatrix(t) m2 = m1.sign() m1.sign(target=m3) m2.copy_to_host() m3.copy_to_host() assert np.max(np.abs(c - m2.numpy_array) ) < 10**-4, "Error in CUDAMatrix.sign exceeded threshold" assert np.max(np.abs(c - m3.numpy_array) ) < 10**-4, "Error in CUDAMatrix.sign exceeded threshold"
def test_manhattan_norm(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') m = cm.CUDAMatrix(a) n1 = np.sum(np.abs(a), dtype=np.double) n2 = m.manhattan_norm() assert np.abs( n1 - n2 ) < 2e-2, "Error in CUDAMatrix.manhattan_norm exceeded threshold (%f != %f)" % ( n1, n2)
def test_mult(): m = 256 n = 128 a = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') t = np.array(np.empty((m, n)), dtype=np.float32, order='F') c = a * b m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) m3 = cm.CUDAMatrix(t) m1.mult(m2, target=m3) m1.mult(m2) m3.copy_to_host() m1.copy_to_host() assert np.max(np.abs(c - m3.numpy_array) ) < 10**-4, "Error in CUDAMatrix.multiply exceeded threshold" assert np.max(np.abs(c - m1.numpy_array) ) < 10**-4, "Error in CUDAMatrix.multiply exceeded threshold"
def test_choose_max(): """ Tests the choose max command """ m = 10 n = 4 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') m = cm.CUDAMatrix(a) a_ = np.zeros_like(a, dtype=np.float) a_[np.argmax(a, axis=0), np.arange(a.shape[1])] = 1.0 m_ = m.choose_max(axis=0) assert np.abs(a_ - m_.asarray( )) < 10**-2, "Error in CUDAMatrix.choose_max exceeded threshold"
def test_lgamma(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') from scipy.special import gammaln c = gammaln(a) m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) cm.lgamma(m1, target=m2) cm.lgamma(m1) m1.copy_to_host() m2.copy_to_host() assert np.max( np.abs(c - m1.numpy_array) ) < 10**-2, "Error in cudamat.lgamma exceeded threshold " + str( np.max(np.abs(c - m1.numpy_array))) assert np.max(np.abs(c - m2.numpy_array) ) < 10**-2, "Error in cudamat.lgamma exceeded threshold"
def test_soft_threshold(): m = 256 n = 128 a = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.randn(m, n) * 10, dtype=np.float32, order='F') alpha = 0.5 c = np.sign(a) * np.maximum(0, np.abs(a) - alpha) m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) m1.apply_soft_threshold(alpha, target=m2) m1.apply_soft_threshold(alpha) m1.copy_to_host() m2.copy_to_host() assert np.max( np.abs(c - m1.numpy_array) ) < 10**-4, "Error in CUDAMatrix.apply_soft_threshold exceeded threshold" assert np.max( np.abs(c - m2.numpy_array) ) < 10**-4, "Error in CUDAMatrix.apply_soft_threshold exceeded threshold"
def updateSignOfWeights(self): """ We need the sign of the weights for L1 regularization. Since we work on the GPU it is convenient to just allocate storage for these things once and periodically update the sign variables when the weights they depend on have changed and we need to know the signs. """ if self.signVisToHid == None or self.signA == None or self.signB == None: self.signVisToHid = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.numHid)))) self.signA = [ cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numVis)))) for i in range(self.numPrev) ] self.signB = [ cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.numHid)))) for i in range(self.numPrev) ] self.visToHid.sign(target=self.signVisToHid) for i in range(self.numPrev): self.A[i].sign(target=self.signA[i]) self.B[i].sign(target=self.signB[i])
def test_T_field(): m = 256 n = 128 cm1 = np.array(np.random.rand(n, m) * 10, dtype=np.float32, order='F') cm2 = np.array(np.random.rand(m, 1) * 10, dtype=np.float32, order='F') gm1 = cm.CUDAMatrix(cm1) gm2 = cm.CUDAMatrix(cm2) # test dot gm = cm.dot(gm2.T, gm1.T) c = np.dot(cm2.T, cm1.T) gm.copy_to_host() assert np.max( np.abs(gm.numpy_array - c) ) < 10**-2, "Error in CUDAMatrix.dot with TransposedCUDAMatrix exceeded threshold" # test add_dot cm3 = np.array(np.random.rand(1, n) * 10, dtype=np.float32, order='F') gm3 = cm.CUDAMatrix(cm3) gm3.add_dot(gm2.T, gm1.T) c = cm3 + np.dot(cm2.T, cm1.T) gm3.copy_to_host() assert np.max( np.abs(gm3.numpy_array - c) ) < 10**-2, "Error in CUDAMatrix.add_dot TransposedCUDAMatrix exceeded threshold" # test add_sums gm2.add_sums(gm1.T, axis=1) c = cm2 + np.atleast_2d(cm1.sum(0)).T gm2.copy_to_host() assert np.max( np.abs(gm2.numpy_array - c) ) < 10**-2, "Error in CUDAMatrix.add_sums TransposedCUDAMatrix exceeded threshold"
def test_reciprocal(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 10 + 10**-3, dtype=np.float32, order='F') b = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') c = 1. / a m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) m1.reciprocal(target=m2) m1.reciprocal() m1.copy_to_host() m2.copy_to_host() assert np.max( np.abs(c - m1.numpy_array )) < 10**-4, "Error in CUDAMatrix.reciprocal exceeded threshold" assert np.max( np.abs(c - m2.numpy_array )) < 10**-4, "Error in CUDAMatrix.reciprocal exceeded threshold"
def test_add_row_vec(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.rand(1, n) * 10, dtype=np.float32, order='F') t = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') c = a + b m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) m3 = cm.CUDAMatrix(t) m1.add_row_vec(m2, target=m3) m1.add_row_vec(m2) m1.copy_to_host() m3.copy_to_host() assert np.max( np.abs(c - m1.numpy_array) ) < 10**-4, "Error in CUDAMatrix.add_row_vec exceeded threshold" assert np.max( np.abs(c - m3.numpy_array) ) < 10**-4, "Error in CUDAMatrix.add_row_vec exceeded threshold"
def test_div_by_col(): m = 256 n = 128 a = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') b = np.array(np.random.rand(m, 1) * 10, dtype=np.float32, order='F') + 0.1 t = np.array(np.random.rand(m, n) * 10, dtype=np.float32, order='F') c = a / b m1 = cm.CUDAMatrix(a) m2 = cm.CUDAMatrix(b) m3 = cm.CUDAMatrix(t) m1.div_by_col(m2, target=m3) m1.div_by_col(m2) m1.copy_to_host() m3.copy_to_host() assert np.max( np.abs(c - m1.numpy_array )) < 10**-4, "Error in CUDAMatrix.div_by_col exceeded threshold" assert np.max( np.abs(c - m3.numpy_array )) < 10**-4, "Error in CUDAMatrix.div_by_col exceeded threshold"
def test_dot_vect(): m = 128 k = 256 n = 1 a = np.array(np.random.randn(m, k)*10, dtype=np.float32, order='F') b = np.array(np.random.randn(k, n)*10, dtype=np.float32, order='F') A = cm.CUDAMatrix(a) B = cm.CUDAMatrix(b) c = np.dot(a, b) C = cm.dot(A, B) assert np.max(np.abs(c - C.asarray())) < 10**-2, "Error in CUDAMatrix.dot exceeded threshold" c = np.dot(a.T, b[:m]) C = cm.dot(A.T, B.slice(0, m)) assert np.max(np.abs(c - C.asarray())) < 10**-2, "Error in CUDAMatrix.dot exceeded threshold" c = np.dot(b.T, a.T) C = cm.dot(B.T, A.T) assert np.max(np.abs(c - C.asarray())) < 10**-2, "Error in CUDAMatrix.dot exceeded threshold" c = np.dot(b[:m].T, a) C = cm.dot(B.slice(0, m).reshape((1, m)), A) assert np.max(np.abs(c - C.asarray())) < 10**-2, "Error in CUDAMatrix.dot exceeded threshold"
def trainLowMemory(self, data, index, numEpochs, reportMB=False): assert (data.dtype == num.dtype('float32')) numcases = len(index) num_mini_batches = numcases / self.mbsz indexPerm = num.random.permutation(range(numcases)) noise = cm.CUDAMatrix(reformat(num.zeros((self.numVis, self.mbsz)))) noiseThresh = cm.CUDAMatrix( reformat(num.zeros((self.numVis, self.mbsz)))) noiseThresh.assign_scalar(1.0 - self.pastNoise) for ep in range(numEpochs): recErr = 0 for mb in range(num_mini_batches): mbIndex = index[indexPerm[mb * self.mbsz:(mb + 1) * self.mbsz]] curInputsMB_CPU = data[:, mbIndex] curPastMB_CPU = [ data[:, mbIndex - i - 1] for i in range(self.numPrev) ] curInputsMB = cm.CUDAMatrix(reformat(curInputsMB_CPU)) curPastMB = [cm.CUDAMatrix(reformat(p)) for p in curPastMB_CPU] for i in range(self.numPrev): if self.pastNoise > 0 and not self.samplePast: noise.fill_with_rand() noise.less_than(noiseThresh, target=noise) curPastMB[i].mult(noise) if self.samplePast: noise.fill_with_rand() noise.less_than(curPastMB[i], target=curPastMB[i]) self.step(curInputsMB, curPastMB) recErr += self.curRecErr() if reportMB: yield (mb, num_mini_batches) yield recErr
def LoadParams(self, proto): self.hyperparams = proto.hyperparams param_names = [param.name for param in proto.param] for param in proto.param: if not param.dimensions: param.dimensions.extend([proto.numlabels * proto.dimensions]) if param.mat: mat = util.ParameterAsNumpy(param).reshape(-1, 1) else: mat = self.InitializeParameter(param).reshape(-1, 1) self.params[param.name] = cm.CUDAMatrix(mat) if param.name == 'bias': self.grad_bias = cm.empty(mat.shape) self.grad_bias.assign(0) self.sample_input = self.hyperparams.sample_input
def rbmVtoH(m, X) : """convey data fron visual layer to hidden layer""" cm.cublas_init() # copy data to GPU data = cm.CUDAMatrix(cm.reformat(X)) weight = cm.CUDAMatrix(cm.reformat(m.weight)) biasH = cm.CUDAMatrix(cm.reformat(m.biasH)) nCase = X.shape[0] nHid = biasH.asarray().size hidActP = cm.CUDAMatrix(np.zeros((nCase, nHid))) if m.type == "BB" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) hidActP.apply_sigmoid() elif m.type == "BG" : cm.dot(data, weight, target = hidActP) hidActP.add_row_vec(biasH) elif m.type == "GB" : pass result = hidActP.asarray() # free device memory data.free_device_memory() weight.free_device_memory() biasH.free_device_memory() hidActP.free_device_memory() cm.shutdown() return result
def Train(self,ref): #ref e o vetor de todas as sa # idas desejados no dado instante de tempo. #calcular o vetor de erros e = self.trainingError(ref) max_lambda = 0.9999 min_lambda = 0.999 #regularization mu = 1e-8 #holder = cm.CUDAMatrix(self.P.asarray()) for saida in range(self.n_out): #regularization step #cm.dot(self.P,self.P,target = holder) #holder.mult(mu) #self.P.subtract(holder) #end regularization step self.sigma_e = (1.0 - 1.0/(self.K_a * self.neu)) * self.sigma_e + (1.0 - (1.0 - 1.0/(self.K_a * self.neu))) * e[saida]**2 self.sigma_q = (cm.pow(cm.dot(cm.dot(self.a.T,self.P),self.a),2).mult((1.0 - (1.0 - 1.0/(self.K_a * self.neu)))).add((1.0 - 1.0/(self.K_a * self.neu)) * float(self.sigma_q))).asarray() self.sigma_v = (1.0 - 1.0/(self.K_b * self.neu)) * self.sigma_v + (1.0 - (1.0 - 1.0/(self.K_b * self.neu))) * e[saida]**2 self.forget_aux = (np.sqrt(self.sigma_q) * np.sqrt(self.sigma_v))/(1e-8 + abs(np.sqrt(self.sigma_e) - np.sqrt(self.sigma_v))) self.forget = np.atleast_2d(np.min([self.forget_aux,max_lambda])) #Transpose respective output view.. Theta = self.Wro.asarray()[saida,:] Theta = Theta.reshape([self.neu,1]) Theta = cm.CUDAMatrix(Theta) #MQR equations #the P equation step by step A = cm.dot(self.P,self.a) B = cm.dot(A,self.a.T) C = cm.dot(B,self.P) D = cm.dot(cm.dot(self.a.T,self.P),self.a).add(np.asscalar(self.forget)) self.P.subtract(C.divide(np.asscalar(D.asarray()))) self.P.divide(np.asscalar(self.forget)) #final update #error calculation Theta.subtract(cm.dot(self.P,self.a).mult(np.asscalar(e[saida]))) Theta = Theta.reshape([1,self.neu]) self.Wro.copy_to_host() self.Wro.numpy_array[saida,:] = Theta.asarray() self.Wro.copy_to_device()