Пример #1
0
    def jz(self, state):
        assert self.yp_tilde_pbo is not None
        floatsize = 4  #number of bytes, 32bit precision...
        bytesize = self.height * self.width

        #state.refresh()
        #state.render()

        self.pycuda_yp_tilde_pbo.unregister()
        self.pycuda_yp_fx_tilde_pbo.unregister()
        self.pycuda_yp_fy_tilde_pbo.unregister()
        self.pycuda_yp_m_tilde_pbo.unregister()

        self._pack_texture_into_PBO(self.yp_tilde_pbo, self.texid, bytesize,
                                    GL_UNSIGNED_BYTE)
        self._pack_texture_into_PBO(self.yp_fx_tilde_pbo, self.tex_fx_id,
                                    bytesize * floatsize, GL_FLOAT)
        self._pack_texture_into_PBO(self.yp_fy_tilde_pbo, self.tex_fy_id,
                                    bytesize * floatsize, GL_FLOAT)
        self._pack_texture_into_PBO(self.yp_m_tilde_pbo, self.tex_m_id,
                                    bytesize, GL_UNSIGNED_BYTE)

        self.pycuda_yp_tilde_pbo = cuda_gl.BufferObject(long(
            self.yp_tilde_pbo))
        self.pycuda_yp_fx_tilde_pbo = cuda_gl.BufferObject(
            long(self.yp_fx_tilde_pbo))
        self.pycuda_yp_fy_tilde_pbo = cuda_gl.BufferObject(
            long(self.yp_fy_tilde_pbo))
        self.pycuda_yp_m_tilde_pbo = cuda_gl.BufferObject(
            long(self.yp_m_tilde_pbo))

        #Copied perturbed image data to CUDA accessible memory, run the Cuda kernel
        return self._process_jz()
Пример #2
0
def create_PBOs(w,h):
    global source_pbo, dest_pbo, pycuda_source_pbo, pycuda_dest_pbo
    num_texels = w*h
    data = numpy.zeros((num_texels,4),numpy.uint8)
    source_pbo = glGenBuffers(1)
    glBindBuffer(GL_ARRAY_BUFFER, source_pbo)
    glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW)
    glBindBuffer(GL_ARRAY_BUFFER, 0)
    pycuda_source_pbo = cuda_gl.BufferObject(long(source_pbo))
    dest_pbo = glGenBuffers(1)
    glBindBuffer(GL_ARRAY_BUFFER, dest_pbo)
    glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW)
    glBindBuffer(GL_ARRAY_BUFFER, 0)
    pycuda_dest_pbo = cuda_gl.BufferObject(long(dest_pbo))
Пример #3
0
def process_image():
    """ copy image and process using CUDA """
    global pycuda_source_pbo,source_pbo,current_size, dest_pbo
    image_width, image_height = current_size
    assert source_pbo is not None

    # tell cuda we are going to get into these buffers
    pycuda_source_pbo.unregister()

    # activate destination buffer
    glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(source_pbo))

    # read data into pbo. note: use BGRA format for optimal performance
    glReadPixels(
             0,                  #start x
             0,                  #start y
             image_width,        #end   x
             image_height,       #end   y
             GL_BGRA,            #format
             GL_UNSIGNED_BYTE,   #output type
             ctypes.c_void_p(0))

    pycuda_source_pbo = cuda_gl.BufferObject(long(source_pbo))

    # run the Cuda kernel
    process(image_width, image_height)
    # blit convolved texture onto the screen
    # download texture from PBO
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, long(dest_pbo))
    glBindTexture(GL_TEXTURE_2D, output_texture)

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
                    image_width, image_height,
                    GL_BGRA, GL_UNSIGNED_BYTE, ctypes.c_void_p(0))
Пример #4
0
def create_PBOs(w, h):
    global dest_pbo, pycuda_dest_pbo
    num_texels = w * h
    dest_pbo = glGenBuffers(1)
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, dest_pbo)
    glBufferData(GL_PIXEL_UNPACK_BUFFER, num_texels * 4, None, GL_DYNAMIC_COPY)
    pycuda_dest_pbo = cuda_gl.BufferObject(long(dest_pbo))
Пример #5
0
 def _initializePBO(self, data):
     pbo = glGenBuffers(1)
     glBindBuffer(GL_ARRAY_BUFFER, pbo)
     glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW)
     glBindBuffer(GL_ARRAY_BUFFER, 0)
     pycuda_pbo = cuda_gl.BufferObject(long(pbo))
     return (pbo, pycuda_pbo)
Пример #6
0
	def j_multi(self, state, deltaX, ee, label):

		assert self.yp_tilde_pbo is not None
		floatsize = 4
		rgbsize = 4 #32-bit
		bytesize = self.height*self.width

		#Perturb first
		#logging.debug('---------- Perturb and render first point')
		state.X[ee[:,0],0] = np.squeeze(state.X[ee[:,0],0] + deltaX)
		state.refresh(label, hess = True)
		state.render()
		state.X[ee[:,0],0] = np.squeeze(state.X[ee[:,0],0] - deltaX)

		#Load pixel buffers
		#logging.debug('---------- map into CUDA accessible memory')
		self.pycuda_yp_tilde_pbo.unregister()
		self.pycuda_yp_fx_tilde_pbo.unregister()
		self.pycuda_yp_fy_tilde_pbo.unregister()
		self.pycuda_yp_m_tilde_pbo.unregister()
		self._pack_texture_into_PBO(self.yp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE)
		self._pack_texture_into_PBO(self.yp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.yp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.yp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA)
		self.pycuda_yp_tilde_pbo = cuda_gl.BufferObject(long(self.yp_tilde_pbo))
		self.pycuda_yp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fx_tilde_pbo))
		self.pycuda_yp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fy_tilde_pbo))
		self.pycuda_yp_m_tilde_pbo = cuda_gl.BufferObject(long(self.yp_m_tilde_pbo))

		#Perturb second
		#logging.debug('---------- Perturb and render second point')
		state.X[ee[:,1],0] = np.squeeze(state.X[ee[:,1],0] + deltaX)
		state.refresh(label, hess = True)
		state.render()
		state.X[ee[:,1],0] = np.squeeze(state.X[ee[:,1],0] - deltaX)

		#Load pixel buffers
		#logging.debug('---------- 2nd map into CUDA accessible memory. Unregister')
		self.pycuda_ypp_tilde_pbo.unregister()
		self.pycuda_ypp_fx_tilde_pbo.unregister()
		self.pycuda_ypp_fy_tilde_pbo.unregister()
		self.pycuda_ypp_m_tilde_pbo.unregister()
		logging.debug('---------- Pack texture into PBO')
		self._pack_texture_into_PBO(self.ypp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE)
		self._pack_texture_into_PBO(self.ypp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.ypp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.ypp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA)
		#logging.debug('---------- Create cuda_gl BufferObjects')
		self.pycuda_ypp_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_tilde_pbo))
		self.pycuda_ypp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fx_tilde_pbo))
		self.pycuda_ypp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fy_tilde_pbo))
		self.pycuda_ypp_m_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_m_tilde_pbo))

		#Send to CUDA!
		logging.debug('---------- Perform reduction with CUDA')
		return self._process_j_multi()
Пример #7
0
	def j(self, state, deltaX, i, j):
		assert self.yp_tilde_pbo is not None
		floatsize = 4
		rgbsize = 4 #32-bit
		bytesize = self.height*self.width

		#Perturb first
		state.X[i,0] += deltaX
		state.refresh()
		state.render()
		state.X[i,0] -= deltaX

		#Load pixel buffers
		self.pycuda_yp_tilde_pbo.unregister()
		self.pycuda_yp_fx_tilde_pbo.unregister()
		self.pycuda_yp_fy_tilde_pbo.unregister()
		self.pycuda_yp_m_tilde_pbo.unregister()
		self._pack_texture_into_PBO(self.yp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE)
		self._pack_texture_into_PBO(self.yp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.yp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.yp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA)
		self.pycuda_yp_tilde_pbo = cuda_gl.BufferObject(long(self.yp_tilde_pbo))
		self.pycuda_yp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fx_tilde_pbo))
		self.pycuda_yp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fy_tilde_pbo))
		self.pycuda_yp_m_tilde_pbo = cuda_gl.BufferObject(long(self.yp_m_tilde_pbo))

		#Perturb second
		state.X[j,0] += deltaX
		state.refresh()
		state.render()
		state.X[j,0] -= deltaX

		#Load pixel buffers
		self.pycuda_ypp_tilde_pbo.unregister()
		self.pycuda_ypp_fx_tilde_pbo.unregister()
		self.pycuda_ypp_fy_tilde_pbo.unregister()
		self.pycuda_ypp_m_tilde_pbo.unregister()
		self._pack_texture_into_PBO(self.ypp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE)
		self._pack_texture_into_PBO(self.ypp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.ypp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT)
		self._pack_texture_into_PBO(self.ypp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA)
		self.pycuda_ypp_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_tilde_pbo))
		self.pycuda_ypp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fx_tilde_pbo))
		self.pycuda_ypp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fy_tilde_pbo))
		self.pycuda_ypp_m_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_m_tilde_pbo))

		#Send to CUDA!
		return self._process_j()
Пример #8
0
def initData(fn=None):
    global pixels, array, pbo_buffer, cuda_pbo_resource, imWidth, imHeight, texid

    # Cuda array initialization
    array = cuda_driver.matrix_to_array(
        pixels, "C")  # C-style instead of Fortran-style: row-major

    pixels.fill(0)  # Resetting the array to 0

    pbo_buffer = glGenBuffers(1)  # generate 1 buffer reference
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer)  # binding to this buffer
    glBufferData(GL_PIXEL_UNPACK_BUFFER, imWidth * imHeight, pixels,
                 GL_STREAM_DRAW)  # Allocate the buffer
    bsize = glGetBufferParameteriv(
        GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE)  # Check allocated buffer size
    assert (bsize == imWidth * imHeight)
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0)  # Unbind

    if ver2011:
        cuda_pbo_resource = pycuda.gl.RegisteredBuffer(
            int(pbo_buffer), cuda_gl.graphics_map_flags.WRITE_DISCARD)
    else:
        cuda_pbo_resource = cuda_gl.BufferObject(
            int(pbo_buffer))  # Mapping GLBuffer to cuda_resource

    glGenTextures(1, texid)
    # generate 1 texture reference
    glBindTexture(GL_TEXTURE_2D, texid)
    # binding to this texture
    glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, imWidth, imHeight, 0,
                 GL_LUMINANCE, GL_UNSIGNED_BYTE, None)
    # Allocate the texture
    glBindTexture(GL_TEXTURE_2D, 0)  # Unbind

    glPixelStorei(GL_UNPACK_ALIGNMENT, 1)  # 1-byte row alignment
    glPixelStorei(GL_PACK_ALIGNMENT, 1)  # 1-byte row alignment
Пример #9
0
    def initjacobian(self, y_im_flip, y_flow, y_m_flip, test=False):
        #Copy y_im, y_fx, y_fy to GPU and copy y_tilde, y_fx_tilde, y_fy_tilde to GPU

        print 'initjac: y_tilde_pbo', y_tilde_pbo

        y_im = np.flipud(y_im_flip)
        y_m = np.flipud(y_m_flip)
        yfx = np.flipud(y_flow[:, :, 0])
        yfy = np.flipud(y_flow[:, :, 0])
        y_flow = np.dstack((yfx, yfy))

        #Tell cuda we are going to get into these buffers
        self.pycuda_y_tilde_pbo.unregister()
        self.pycuda_y_fx_tilde_pbo.unregister()
        self.pycuda_y_fy_tilde_pbo.unregister()
        self.pycuda_y_m_tilde_pbo.unregister()
        self.pycuda_y_im_pbo.unregister()
        self.pycuda_y_fx_pbo.unregister()
        self.pycuda_y_fy_pbo.unregister()
        self.pycuda_y_m_pbo.unregister()

        ########################################################################
        #y_im###################################################################
        ########################################################################

        #Load buffer for packing
        bytesize = self.height * self.width
        self._pack_texture_into_PBO(self.y_tilde_pbo, self.texid, bytesize,
                                    GL_UNSIGNED_BYTE)

        #Load y_im (current frame) info from CPU memory
        glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_im_pbo))
        glBufferData(GL_PIXEL_PACK_BUFFER_ARB, self.width * self.height, y_im,
                     GL_STREAM_DRAW)
        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0)
        glDisable(GL_TEXTURE_2D)

        ########################################################################
        #y_fx###################################################################
        ########################################################################
        floatsize = 4  #32-bit
        self._pack_texture_into_PBO(self.y_fx_tilde_pbo, self.tex_fx_id,
                                    bytesize * floatsize, GL_FLOAT)

        #Load y_fx (current frame) info from CPU memory
        glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_fx_pbo))
        glBufferData(GL_PIXEL_PACK_BUFFER_ARB,
                     self.width * self.height * floatsize, y_flow[:, :, 0],
                     GL_STREAM_DRAW)
        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0)
        glDisable(GL_TEXTURE_2D)

        ########################################################################
        #y_fy###################################################################
        ########################################################################

        self._pack_texture_into_PBO(self.y_fy_tilde_pbo, self.tex_fy_id,
                                    bytesize * floatsize, GL_FLOAT)

        #Load y_fx (current frame) info from CPU memory
        glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_fy_pbo))
        glBufferData(GL_PIXEL_PACK_BUFFER_ARB,
                     self.width * self.height * floatsize, y_flow[:, :, 1],
                     GL_STREAM_DRAW)
        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0)
        glDisable(GL_TEXTURE_2D)

        ########################################################################
        #y_m####################################################################
        ########################################################################

        #Load buffer for packing
        bytesize = self.height * self.width
        self._pack_texture_into_PBO(self.y_m_tilde_pbo, self.tex_m_id,
                                    bytesize, GL_UNSIGNED_BYTE)

        #Load y_im (current frame) info from CPU memory
        glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_m_pbo))
        glBufferData(GL_PIXEL_PACK_BUFFER_ARB, self.width * self.height, y_m,
                     GL_STREAM_DRAW)
        glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0)
        glDisable(GL_TEXTURE_2D)

        self.pycuda_y_tilde_pbo = cuda_gl.BufferObject(long(self.y_tilde_pbo))
        self.pycuda_y_fx_tilde_pbo = cuda_gl.BufferObject(
            long(self.y_fx_tilde_pbo))
        self.pycuda_y_fy_tilde_pbo = cuda_gl.BufferObject(
            long(self.y_fy_tilde_pbo))
        self.pycuda_y_m_tilde_pbo = cuda_gl.BufferObject(
            long(self.y_m_tilde_pbo))
        self.pycuda_y_im_pbo = cuda_gl.BufferObject(long(self.y_im_pbo))
        self.pycuda_y_fx_pbo = cuda_gl.BufferObject(long(self.y_fx_pbo))
        self.pycuda_y_fy_pbo = cuda_gl.BufferObject(long(self.y_fy_pbo))
        self.pycuda_y_m_pbo = cuda_gl.BufferObject(long(self.y_m_pbo))

        #Loaded all into CUDA accessible memory, can test loaded with the following
        if test:
            return self._process_initjac_test(TEST_IMAGE)
        else:
            return None