def perform(self, node, inputs, out_storage): y, alpha, A, x, beta = inputs inplace = self.inplace if inplace and y.strides[0] < 0: inplace = False out_storage[0][0] = blas.gemv(alpha, A, x, beta, y, overwrite_y=inplace)
def gemv(shp, dtype, order, trans, offseted_i, sliced, overwrite, init_y, alpha=1.0, beta=0.0): cA, gA = gen_gpuarray(shp, dtype, order=order, offseted_inner=offseted_i, sliced=sliced, ctx=context) if trans: shpX = (shp[0],) shpY = (shp[1],) else: shpX = (shp[1],) shpY = (shp[0],) cX, gX = gen_gpuarray(shpX, dtype, offseted_inner=offseted_i, sliced=sliced, ctx=context) if init_y: cY, gY = gen_gpuarray(shpY, dtype, ctx=context) else: cY, gY = None, None if dtype == 'float32': cr = fblas.sgemv(alpha, cA, cX, beta, cY, trans=trans, overwrite_y=overwrite) else: cr = fblas.dgemv(alpha, cA, cX, beta, cY, trans=trans, overwrite_y=overwrite) gr = gblas.gemv(alpha, gA, gX, beta, gY, trans_a=trans, overwrite_y=overwrite) numpy.testing.assert_allclose(cr, numpy.asarray(gr), rtol=1e-6)
def perform(self, node, inputs, out_storage): y, alpha, A, x, beta = inputs out_storage[0][0] = blas.gemv(alpha, A, x, beta, y, trans=False, overwrite_y=self.inplace)