示例#1
0
    def test_combine(self):
        """
        tests the combine (linear combination) kernel
        """
        width = numpy.int32(157)
        height = numpy.int32(147)
        coeff1 = numpy.random.rand(1)[0].astype(numpy.float32)
        coeff2 = numpy.random.rand(1)[0].astype(numpy.float32)
        mat1 = numpy.random.rand(height, width).astype(numpy.float32)
        mat2 = numpy.random.rand(height, width).astype(numpy.float32)

        gpu_mat1 = pyopencl.array.to_device(queue, mat1)
        gpu_mat2 = pyopencl.array.to_device(queue, mat2)
        gpu_out = pyopencl.array.empty(queue,
                                       mat1.shape,
                                       dtype=numpy.float32,
                                       order="C")
        shape = calc_size((width, height), self.wg)

        t0 = time.time()
        k1 = self.program.combine(queue, shape, self.wg, gpu_mat1.data, coeff1,
                                  gpu_mat2.data, coeff2, gpu_out.data,
                                  numpy.int32(0), width, height)
        res = gpu_out.get()
        t1 = time.time()
        ref = my_combine(mat1, coeff1, mat2, coeff2)
        t2 = time.time()
        delta = abs(ref - res).max()
        logger.info("delta=%s" % delta)
        self.assert_(delta < 1e-4, "delta=%s" % (delta))
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Linear combination took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
示例#2
0
 def setUp(self):
     self.input = numpy.ascontiguousarray(scipy.misc.lena()[:510, :511])
     self.gpudata = pyopencl.array.empty(queue,
                                         self.input.shape,
                                         dtype=numpy.float32,
                                         order="C")
     kernel_path = os.path.join(
         os.path.dirname(os.path.abspath(sift.__file__)), "preprocess.cl")
     reduct_path = os.path.join(
         os.path.dirname(os.path.abspath(sift.__file__)), "reductions.cl")
     kernel_src = open(kernel_path).read()
     reduct_src = open(reduct_path).read()
     self.program = pyopencl.Program(ctx, kernel_src).build()
     self.reduction = pyopencl.Program(ctx, reduct_src).build()
     self.IMAGE_W = numpy.int32(self.input.shape[-1])
     self.IMAGE_H = numpy.int32(self.input.shape[0])
     self.wg = (32, 16)  #(256, 2) #(32, 16) # (2, 256)
     self.shape = calc_size((self.IMAGE_W, self.IMAGE_H), self.wg)
     #        print self.shape
     self.binning = (
         4, 2)  # Nota if wg < ouptup size weired results are expected !
     #        self.binning = (2, 2)
     self.red_size = 128  #reduction size
     self.twofivefive = pyopencl.array.to_device(
         queue, numpy.array([255], numpy.float32))
     self.buffers_max_min = pyopencl.array.empty(
         queue, (self.red_size, 2),
         dtype=numpy.float32)  # temporary buffer for max/min reduction
     self.buffers_min = pyopencl.array.empty(queue, (1),
                                             dtype=numpy.float32)
     self.buffers_max = pyopencl.array.empty(queue, (1),
                                             dtype=numpy.float32)
示例#3
0
    def normalize(self, raw, flat1, flat2):
        '''
        Normalizes the image with OpenCL
        NOTA: images are passed as numpy.array, so the read (edf/hdf5) is done before calling this function
        '''

        output_height, output_width = raw.shape
        shape = calc_size((output_width, output_height), self.wg)
        gpu_raw = pyopencl.array.to_device(self.queue, raw)
        gpu_dark3 = pyopencl.array.to_device(self.queue, self.dark_data)
        gpu_dark6 = pyopencl.array.to_device(self.queue, self.dark_ref)
        gpu_flat1 = pyopencl.array.to_device(self.queue, flat1)
        gpu_flat2 = pyopencl.array.to_device(self.queue, flat2)
        gpu_output = pyopencl.array.empty(self.queue,
                                          (output_height, output_width),
                                          dtype=numpy.float32,
                                          order="C")
        output_height, output_width = numpy.int32(
            (output_height, output_width))

        k1 = self.program.correction(self.queue, shape, self.wg, gpu_raw.data,
                                     gpu_dark3.data, gpu_dark6.data,
                                     gpu_flat1.data, gpu_flat2.data,
                                     gpu_output.data, output_width,
                                     output_height)
        res = gpu_output.get()

        return res
示例#4
0
    def test_combine(self):
        """
        tests the combine (linear combination) kernel
        """
        width = numpy.int32(157)
        height = numpy.int32(147)
        coeff1 = numpy.random.rand(1)[0].astype(numpy.float32)
        coeff2 = numpy.random.rand(1)[0].astype(numpy.float32)
        mat1 = numpy.random.rand(height, width).astype(numpy.float32)
        mat2 = numpy.random.rand(height, width).astype(numpy.float32)

        gpu_mat1 = pyopencl.array.to_device(queue, mat1)
        gpu_mat2 = pyopencl.array.to_device(queue, mat2)
        gpu_out = pyopencl.array.empty(queue, mat1.shape, dtype=numpy.float32, order="C")
        shape = calc_size((width, height), self.wg)

        t0 = time.time()
        k1 = self.program.combine(queue, shape, self.wg,
                                  gpu_mat1.data, coeff1, gpu_mat2.data, coeff2,
                                  gpu_out.data, numpy.int32(0),
                                  width, height)
        res = gpu_out.get()
        t1 = time.time()
        ref = my_combine(mat1, coeff1, mat2, coeff2)
        t2 = time.time()
        delta = abs(ref - res).max()
        logger.info("delta=%s" % delta)
        self.assert_(delta < 1e-4, "delta=%s" % (delta))
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Linear combination took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#5
0
    def test_gradient(self):
        """
        tests the gradient kernel (norm and orientation)
        """

        self.width = numpy.int32(15)
        self.height = numpy.int32(14)

        self.mat = numpy.random.rand(self.height,self.width).astype(numpy.float32)
        self.gpu_mat = pyopencl.array.to_device(queue, self.mat)
        self.gpu_grad = pyopencl.array.empty(queue, self.mat.shape, dtype=numpy.float32, order="C")
        self.gpu_ori = pyopencl.array.empty(queue, self.mat.shape, dtype=numpy.float32, order="C")
        self.shape = calc_size(self.mat.shape, self.wg)

        t0 = time.time()
        k1 = self.program.compute_gradient_orientation(queue, self.shape, self.wg, self.gpu_mat.data, self.gpu_grad.data, self.gpu_ori.data, self.width, self.height)
        res_norm = self.gpu_grad.get()
        res_ori = self.gpu_ori.get()
        t1 = time.time()
        ref_norm,ref_ori = my_gradient(self.mat)
        t2 = time.time()
        delta_norm = abs(ref_norm - res_norm).max()
        delta_ori = abs(ref_ori - res_ori).max()
        self.assert_(delta_norm < 1e-4, "delta_norm=%s" % (delta_norm))
        self.assert_(delta_ori < 1e-4, "delta_ori=%s" % (delta_ori))
        logger.info("delta_norm=%s" % delta_norm)
        logger.info("delta_ori=%s" % delta_ori)
        
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Gradient computation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#6
0
    def test_local_maxmin(self):
        """
        tests the local maximum/minimum detection kernel
        """
        #local_maxmin_setup :
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, s, nb_keypoints, width, height, DOGS, g = local_maxmin_setup()
        self.s = numpy.int32(s) #1, 2, 3 ... not 4 nor 0.
        self.gpu_dogs = pyopencl.array.to_device(queue, DOGS)
        self.output = pyopencl.array.empty(queue, (nb_keypoints,4), dtype=numpy.float32, order="C")
        self.output.fill(-1.0,queue) #memset for invalid keypoints
        self.counter = pyopencl.array.zeros(queue, (1,), dtype=numpy.int32, order="C")
        nb_keypoints = numpy.int32(nb_keypoints)
        self.shape = calc_size((DOGS.shape[1],DOGS.shape[0]*DOGS.shape[2]), self.wg) #it's a 3D vector !!
        
        t0 = time.time()
        k1 = self.program.local_maxmin(queue, self.shape, self.wg, 
        	self.gpu_dogs.data, self.output.data, 
       		border_dist, peakthresh, octsize, EdgeThresh0, EdgeThresh,
       		self.counter.data, nb_keypoints, self.s, width, height)

        res = self.output.get()   
        self.keypoints1 = self.output #for further use
        self.actual_nb_keypoints = self.counter.get()[0] #for further use
      
        t1 = time.time()
        ref, actual_nb_keypoints2 = my_local_maxmin(DOGS,peakthresh,border_dist, octsize, 
        	EdgeThresh0, EdgeThresh,nb_keypoints,self.s,width,height)
        t2 = time.time()
        
        #we have to sort the arrays, for peaks orders is unknown for GPU
        res_peaks = res[(res[:,0].argsort(axis=0)),0]
        ref_peaks = ref[(ref[:,0].argsort(axis=0)),0]
        res_r = res[(res[:,1].argsort(axis=0)),1]
        ref_r = ref[(ref[:,1].argsort(axis=0)),1]
        res_c = res[(res[:,2].argsort(axis=0)),2]
        ref_c = ref[(ref[:,2].argsort(axis=0)),2]
        #res_s = res[(res[:,3].argsort(axis=0)),3]
        #ref_s = ref[(ref[:,3].argsort(axis=0)),3]
        delta_peaks = abs(ref_peaks - res_peaks).max()
        delta_r = abs(ref_r - res_r).max()
        delta_c = abs(ref_c - res_c).max()

        if (PRINT_KEYPOINTS):
            print("keypoints after 2 steps of refinement: (s= %s, octsize=%s) %s" %(self.s,octsize,self.actual_nb_keypoints))
            #print("For ref: %s" %(ref_peaks[ref_peaks!=-1].shape))
            print res[0:self.actual_nb_keypoints]#[0:74]
            #print ref[0:32]
        
        self.assert_(delta_peaks < 1e-4, "delta_peaks=%s" % (delta_peaks))
        self.assert_(delta_r < 1e-4, "delta_r=%s" % (delta_r))
        self.assert_(delta_c < 1e-4, "delta_c=%s" % (delta_c))
        logger.info("delta_peaks=%s" % delta_peaks)
        logger.info("delta_r=%s" % delta_r)
        logger.info("delta_c=%s" % delta_c)


        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Local extrema search took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#7
0
    def test_orientation(self):
        '''
        #tests keypoints orientation assignment kernel
        '''
        
        #orientation_setup :
        keypoints, nb_keypoints, updated_nb_keypoints, grad, ori, octsize = orientation_setup()
        #keypoints is a compacted vector of keypoints #not anymore
        keypoints_before_orientation = numpy.copy(keypoints) #important here
        wg = max(self.wg),
        shape = calc_size((keypoints.shape[0],), wg)
        #shape = calc_size(keypoints.shape, self.wg)
        gpu_keypoints = pyopencl.array.to_device(queue,keypoints)
        actual_nb_keypoints = numpy.int32(updated_nb_keypoints)
        print("Max. number of keypoints before orientation assignment : %s" %actual_nb_keypoints)

        gpu_grad = pyopencl.array.to_device(queue, grad)
        gpu_ori = pyopencl.array.to_device(queue, ori)
        orisigma = numpy.float32(1.5) #SIFT
        grad_height, grad_width = numpy.int32(grad.shape)
        keypoints_start = numpy.int32(0)
        keypoints_end = numpy.int32(actual_nb_keypoints)
        counter = pyopencl.array.to_device(queue, keypoints_end) #actual_nb_keypoints)
        
        t0 = time.time()
        k1 = self.program.orientation_assignment(queue, shape, wg, 
        	gpu_keypoints.data, gpu_grad.data, gpu_ori.data, counter.data,
        	octsize, orisigma, nb_keypoints, keypoints_start, keypoints_end, grad_width, grad_height)    	
        res = gpu_keypoints.get()
        cnt = counter.get()
        t1 = time.time()
        
        ref,updated_nb_keypoints = my_orientation(keypoints, nb_keypoints, keypoints_start, keypoints_end, grad, ori, octsize, orisigma)
                
        t2 = time.time()
        
        #print keypoints_before_orientation[0:33]
        if (PRINT_KEYPOINTS):
            print("Keypoints after orientation assignment :")
            print res[0:actual_nb_keypoints]#[0:10]
            #print " "
            #print ref[0:7]
        
        print("Total keypoints for kernel : %s -- For Python : %s \t [octsize = %s]" %(cnt,updated_nb_keypoints,octsize))
       
        #sort to compare added keypoints
        d1,d2,d3,d4 = keypoints_compare(ref,res)
        self.assert_(d1 < 1e-4, "delta_cols=%s" % (d1))
        self.assert_(d2 < 1e-4, "delta_rows=%s" % (d2))
        self.assert_(d3 < 1e-4, "delta_sigma=%s" % (d3))
        self.assert_(d4 < 1e-4, "delta_angle=%s" % (d4))
        logger.info("delta_cols=%s" % d1)
        logger.info("delta_rows=%s" % d2)
        logger.info("delta_sigma=%s" % d3)
        logger.info("delta_angle=%s" % d4)
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Orientation assignment took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#8
0
    def test_compact(self):
        """
        tests the "compact" kernel
        """

        nbkeypoints = 10000  #constant value
        keypoints = numpy.random.rand(nbkeypoints, 4).astype(numpy.float32)
        nb_ones = 0
        for i in range(0, nbkeypoints):
            if ((numpy.random.rand(1))[0] < 0.25):
                keypoints[i] = (-1, -1, i, -1)
                nb_ones += 1
            else:
                keypoints[i, 2] = i

        gpu_keypoints = pyopencl.array.to_device(queue, keypoints)
        output = pyopencl.array.empty(queue, (nbkeypoints, 4),
                                      dtype=numpy.float32,
                                      order="C")
        output.fill(-1.0, queue)
        counter = pyopencl.array.zeros(queue, (1, ),
                                       dtype=numpy.int32,
                                       order="C")
        wg = max(self.wg),
        shape = calc_size((keypoints.shape[0], ), wg)
        nbkeypoints = numpy.int32(nbkeypoints)
        startkeypoints = numpy.int32(0)
        t0 = time.time()
        k1 = self.program.compact(queue, shape, wg, gpu_keypoints.data,
                                  output.data, counter.data, startkeypoints,
                                  nbkeypoints)
        res = output.get()
        if (PRINT_KEYPOINTS):
            print res
        count = counter.get()[0]
        t1 = time.time()
        ref, count_ref = my_compact(keypoints, nbkeypoints)
        t2 = time.time()

        print("Kernel counter : %s / Python counter : %s / True value : %s" %
              (count, count_ref, nbkeypoints - nb_ones))

        res_sort_arg = res[:, 2].argsort(axis=0)
        res_sort = res[res_sort_arg]
        ref_sort_arg = ref[:, 2].argsort(axis=0)
        ref_sort = ref[ref_sort_arg]
        if (PRINT_KEYPOINTS):
            print "Delta matrix :"
            print(abs(res_sort - ref_sort) > 1e-5).sum()
        delta = abs((res_sort - ref_sort)).max()
        self.assert_(delta < 1e-5, "delta=%s" % (delta))
        self.assertEqual(count, count_ref, "counters are the same")
        logger.info("delta=%s" % delta)
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Compact operation took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
示例#9
0
    def siftAlign(self):
        '''
        Call SIFT to align images
        Assume that all the images have the same dimensions !
        '''

        mp = sift.MatchPlan(devicetype=self.devicetype)

        #TODO: place the following in a separate routine (in SIFT module ?)
        kernel_path = "openCL/transform.cl"
        kernel_src = open(kernel_path).read()
        program = pyopencl.Program(
            self.ctx,
            kernel_src).build()  #.build('-D WORKGROUP_SIZE=%s' % wg_size)
        wg = 8, 8  #FIXME: hard-coded

        i = 0
        for img in os.listdir(self.save_folder):
            if i == 0:  #compute SIFT keypoints on the first image
                i = 1
                plan = sift.SiftPlan(template=img, devicetype=self.devicetype)
                kp_first = plan.keypoints(img)
            else:
                kp = plan.keypoints(img)
                m = mp.match(kp_first, kp)
                sol = self.matchingCorrection(m)

                correction_matrix = numpy.zeros((2, 2), dtype=numpy.float32)
                correction_matrix[0] = sol[0:2, 0]
                correction_matrix[1] = sol[3:5, 0]
                matrix_for_gpu = correction_matrix.reshape(
                    4, 1)  #for float4 struct
                offset_value[0] = sol[2, 0]
                offset_value[1] = sol[5, 0]

                img, image_height, image_width = self.imageReshape(img)
                gpu_image = pyopencl.array.to_device(self.queue, img)
                gpu_output = pyopencl.array.empty(self.queue,
                                                  (image_height, image_width),
                                                  dtype=numpy.float32,
                                                  order="C")
                gpu_matrix = pyopencl.array.to_device(self.queue,
                                                      matrix_for_gpu)
                gpu_offset = pyopencl.array.to_device(self.queue, offset_value)
                image_height, image_width = numpy.int32(
                    (image_height, image_width))
                output_height, output_width = image_height, image_width

                if i == 1: shape = calc_size((output_width, output_height), wg)
                k1 = program.transform(self.queue, shape, wg, gpu_image.data,
                                       gpu_output.data, gpu_matrix.data,
                                       gpu_offset.data, image_width,
                                       image_height, output_width,
                                       output_height, fill_value, mode)
                res = gpu_output.get()

                #                scipy.misc.imsave(self.aligned_folder + "/frame" + str(i) +".png", res)
                i += 1
示例#10
0
    def test_interpolation(self):
        """
        tests the keypoints interpolation kernel
        Requires the following: "self.keypoints1", "self.actual_nb_keypoints", 	"self.gpu_dog_prev", self.gpu_dog", 			"self.gpu_dog_next", "self.s", "self.width", "self.height", "self.peakthresh"
        """

        #interpolation_setup :
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, nb_keypoints, actual_nb_keypoints, width, height, DOGS, s, keypoints_prev, blur = interpolation_setup(
        )

        # actual_nb_keypoints is the number of keypoints returned by "local_maxmin".
        #After the interpolation, it will be reduced, but we can still use it as a boundary.
        shape = calc_size(keypoints_prev.shape, self.wg)
        gpu_dogs = pyopencl.array.to_device(queue, DOGS)
        gpu_keypoints1 = pyopencl.array.to_device(queue, keypoints_prev)
        #actual_nb_keypoints = numpy.int32(len((keypoints_prev[:,0])[keypoints_prev[:,1] != -1]))
        start_keypoints = numpy.int32(0)
        actual_nb_keypoints = numpy.int32(actual_nb_keypoints)
        InitSigma = numpy.float32(
            1.6)  #warning: it must be the same in my_keypoints_interpolation
        t0 = time.time()
        k1 = self.program.interp_keypoint(queue, shape, self.wg, gpu_dogs.data,
                                          gpu_keypoints1.data, start_keypoints,
                                          actual_nb_keypoints, peakthresh,
                                          InitSigma, width, height)
        res = gpu_keypoints1.get()

        t1 = time.time()
        ref = numpy.copy(keypoints_prev)  #important here
        for i, k in enumerate(ref[:nb_keypoints, :]):
            ref[i] = my_interp_keypoint(DOGS, s, k[1], k[2], 5, peakthresh,
                                        width, height)

        t2 = time.time()

        #we have to compare keypoints different from (-1,-1,-1,-1)
        res2 = res[res[:, 1] != -1]
        ref2 = ref[ref[:, 1] != -1]

        if (PRINT_KEYPOINTS):
            print("[s=%s]Keypoints before interpolation: %s" %
                  (s, actual_nb_keypoints))
            #print keypoints_prev[0:10,:]
            print("[s=%s]Keypoints after interpolation : %s" %
                  (s, res2.shape[0]))
            print res[0:actual_nb_keypoints]  #[0:10,:]
            #print("Ref:")
            #print ref[0:32,:]

        delta = abs(ref2 - res2).max()
        self.assert_(delta < 1e-4, "delta=%s" % (delta))
        logger.info("delta=%s" % delta)

        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Keypoints interpolation took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
示例#11
0
    def test_gradient(self):
        """
        tests the gradient kernel (norm and orientation)
        """

        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, scale, nb_keypoints, width, height, DOGS, g = local_maxmin_setup(
        )
        self.mat = numpy.ascontiguousarray(g[1])
        self.height, self.width = numpy.int32(self.mat.shape)
        self.gpu_mat = pyopencl.array.to_device(queue, self.mat)
        self.gpu_grad = pyopencl.array.empty(queue,
                                             self.mat.shape,
                                             dtype=numpy.float32,
                                             order="C")
        self.gpu_ori = pyopencl.array.empty(queue,
                                            self.mat.shape,
                                            dtype=numpy.float32,
                                            order="C")
        self.shape = calc_size((self.width, self.height), self.wg)

        t0 = time.time()
        k1 = self.program.compute_gradient_orientation(
            queue, self.shape, self.wg, self.gpu_mat.data, self.gpu_grad.data,
            self.gpu_ori.data, self.width, self.height)
        res_norm = self.gpu_grad.get()
        res_ori = self.gpu_ori.get()
        t1 = time.time()
        ref_norm, ref_ori = my_gradient(self.mat)
        t2 = time.time()
        delta_norm = abs(ref_norm - res_norm).max()
        delta_ori = abs(ref_ori - res_ori).max()
        if (PRINT_KEYPOINTS):
            rmin, cmin = 0, 0
            rmax, cmax = rmin + 6, cmin + 6

            print res_norm[-rmax, cmin:cmax]
            print ""
            print ref_norm[-rmax, cmin:cmax]
            fig = pylab.figure()
            sp1 = fig.add_subplot(121)
            sp1.imshow(res_norm, interpolation="nearest")
            sp2 = fig.add_subplot(122)
            sp2.imshow(ref_norm, interpolation="nearest")
            fig.show()
            raw_input("enter")

        self.assert_(delta_norm < 1e-4, "delta_norm=%s" % (delta_norm))
        self.assert_(delta_ori < 1e-4, "delta_ori=%s" % (delta_ori))
        logger.info("delta_norm=%s" % delta_norm)
        logger.info("delta_ori=%s" % delta_ori)

        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Gradient computation took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
示例#12
0
    def test_interpolation(self):
        """
        tests the keypoints interpolation kernel
        Requires the following: "self.keypoints1", "self.actual_nb_keypoints", 	"self.gpu_dog_prev", self.gpu_dog", 			"self.gpu_dog_next", "self.s", "self.width", "self.height", "self.peakthresh"
        """

        #interpolation_setup :
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, nb_keypoints, actual_nb_keypoints, width, height, DOGS, s, keypoints_prev, blur = interpolation_setup()

        # actual_nb_keypoints is the number of keypoints returned by "local_maxmin".
        #After the interpolation, it will be reduced, but we can still use it as a boundary.
        shape = calc_size(keypoints_prev.shape, self.wg)
        gpu_dogs = pyopencl.array.to_device(queue, DOGS)
        gpu_keypoints1 = pyopencl.array.to_device(queue, keypoints_prev)
        #actual_nb_keypoints = numpy.int32(len((keypoints_prev[:,0])[keypoints_prev[:,1] != -1]))
        start_keypoints = numpy.int32(0)
        actual_nb_keypoints = numpy.int32(actual_nb_keypoints)
        InitSigma = numpy.float32(1.6) #warning: it must be the same in my_keypoints_interpolation
        t0 = time.time()
        k1 = self.program.interp_keypoint(queue, shape, self.wg,
        	gpu_dogs.data, gpu_keypoints1.data, start_keypoints, actual_nb_keypoints,
        	peakthresh, InitSigma, width, height)
        res = gpu_keypoints1.get()

        t1 = time.time()
        ref = numpy.copy(keypoints_prev) #important here
        for i, k in enumerate(ref[:nb_keypoints, :]):
            ref[i] = my_interp_keypoint(DOGS, s, k[1], k[2], 5, peakthresh, width, height)

        t2 = time.time()


        #we have to compare keypoints different from (-1,-1,-1,-1)
        res2 = res[res[:, 1] != -1]
        ref2 = ref[ref[:, 1] != -1]


        if (PRINT_KEYPOINTS):
            print("[s=%s]Keypoints before interpolation: %s" % (s, actual_nb_keypoints))
            #print keypoints_prev[0:10,:]
            print("[s=%s]Keypoints after interpolation : %s" % (s, res2.shape[0]))
            print res[0:actual_nb_keypoints]#[0:10,:]
            #print("Ref:")
            #print ref[0:32,:]


        delta = abs(ref2 - res2).max()
        self.assert_(delta < 1e-4, "delta=%s" % (delta))
        logger.info("delta=%s" % delta)

        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Keypoints interpolation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#13
0
    def test_bin(self):
        """
        Test binning kernel
        """
        lint = numpy.ascontiguousarray(self.input, numpy.float32)

        out_shape = tuple(
            int(math.ceil((float(i) / j)))
            for i, j in zip(self.input.shape, self.binning))
        t0 = time.time()
        inp_gpu = pyopencl.array.to_device(queue, lint)
        out_gpu = pyopencl.array.empty(queue,
                                       out_shape,
                                       dtype=numpy.float32,
                                       order="C")
        k1 = self.program.bin(queue,
                              calc_size((out_shape[1], out_shape[0]), self.wg),
                              self.wg, inp_gpu.data, out_gpu.data,
                              numpy.int32(self.binning[1]),
                              numpy.int32(self.binning[0]),
                              numpy.int32(lint.shape[1]),
                              numpy.int32(lint.shape[0]),
                              numpy.int32(out_shape[1]),
                              numpy.int32(out_shape[0]))
        res = out_gpu.get()
        t1 = time.time()
        ref = binning(lint, self.binning) / self.binning[0] / self.binning[1]
        t2 = time.time()
        #        print ref.shape, res.shape
        delta = abs(ref - res).max()
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Binning took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
            fig = pylab.figure()
            fig.suptitle('Binning by %s,%s' % self.binning)
            sp1 = fig.add_subplot(221)
            sp1.imshow(lint, interpolation="nearest")
            sp1.set_title("Input")
            sp2 = fig.add_subplot(222)
            sp2.imshow(ref, interpolation="nearest")
            sp2.set_title("Reference")
            sp3 = fig.add_subplot(223)
            sp3.imshow(ref - res, interpolation="nearest")
            sp3.set_title("Delta= %s" % delta)
            sp4 = fig.add_subplot(224)
            sp4.imshow(res, interpolation="nearest")
            sp4.set_title("GPU")
            fig.show()
            raw_input("enter")
        self.assert_(delta < 1e-6, "delta=%s" % delta)
示例#14
0
 def setUp(self):
     self.input = scipy.misc.lena()
     self.gpudata = pyopencl.array.empty(queue, self.input.shape, dtype=numpy.float32, order="C")
     kernel_path = os.path.join(os.path.dirname(os.path.abspath(sift.__file__)), "preprocess.cl")
     kernel_src = open(kernel_path).read()
     self.program = pyopencl.Program(ctx, kernel_src).build()
     self.IMAGE_W = numpy.int32(self.input.shape[-1])
     self.IMAGE_H = numpy.int32(self.input.shape[0])
     self.wg = (2, 256)
     self.shape = calc_size(self.input.shape, self.wg)
     self.binning = (4, 2) # Nota if wg < ouptup size weired results are expected !
     self.binning = (2, 2)
     self.twofivefive = pyopencl.array.to_device(queue, numpy.array([255], numpy.float32))
示例#15
0
    def siftAlign(self):
        '''
        Call SIFT to align images
        Assume that all the images have the same dimensions !
        '''

        mp = sift.MatchPlan(devicetype=self.devicetype)

        #TODO: place the following in a separate routine (in SIFT module ?)
        kernel_path = "openCL/transform.cl"
        kernel_src = open(kernel_path).read()
        program = pyopencl.Program(self.ctx, kernel_src).build() #.build('-D WORKGROUP_SIZE=%s' % wg_size)
        wg = 8, 8 #FIXME: hard-coded



        i = 0
        for img in os.listdir(self.save_folder):
            if i == 0: #compute SIFT keypoints on the first image
                i = 1
                plan = sift.SiftPlan(template=img, devicetype=self.devicetype)
                kp_first = plan.keypoints(img)
            else:
                kp = plan.keypoints(img)
                m = mp.match(kp_first, kp)
                sol = self.matchingCorrection(m)

                correction_matrix = numpy.zeros((2, 2), dtype=numpy.float32)
                correction_matrix[0] = sol[0:2, 0]
                correction_matrix[1] = sol[3:5, 0]
                matrix_for_gpu = correction_matrix.reshape(4, 1) #for float4 struct
                offset_value[0] = sol[2, 0]
                offset_value[1] = sol[5, 0]

                img, image_height, image_width = self.imageReshape(img)
                gpu_image = pyopencl.array.to_device(self.queue, img)
                gpu_output = pyopencl.array.empty(self.queue, (image_height, image_width), dtype=numpy.float32, order="C")
                gpu_matrix = pyopencl.array.to_device(self.queue, matrix_for_gpu)
                gpu_offset = pyopencl.array.to_device(self.queue, offset_value)
                image_height, image_width = numpy.int32((image_height, image_width))
                output_height, output_width = image_height, image_width

                if i == 1: shape = calc_size((output_width, output_height), wg)
                k1 = program.transform(self.queue, shape, wg,
                        gpu_image.data, gpu_output.data, gpu_matrix.data, gpu_offset.data,
                        image_width, image_height, output_width, output_height, fill_value, mode)
                res = gpu_output.get()

#                scipy.misc.imsave(self.aligned_folder + "/frame" + str(i) +".png", res)
                i += 1
示例#16
0
    def test_compact(self):
        """
        tests the "compact" kernel
        """

        nbkeypoints = 10000 #constant value
        keypoints = numpy.random.rand(nbkeypoints, 4).astype(numpy.float32)
        nb_ones = 0
        for i in range(0, nbkeypoints):
            if ((numpy.random.rand(1))[0] < 0.25):
                keypoints[i] = (-1, -1, i, -1)
                nb_ones += 1
            else: keypoints[i,2] = i

        gpu_keypoints = pyopencl.array.to_device(queue, keypoints)
        output = pyopencl.array.empty(queue, (nbkeypoints, 4), dtype=numpy.float32, order="C")
        output.fill(-1.0, queue)
        counter = pyopencl.array.zeros(queue, (1,), dtype=numpy.int32, order="C")
        wg = max(self.wg),
        shape = calc_size((keypoints.shape[0],), wg)
        nbkeypoints = numpy.int32(nbkeypoints)
        startkeypoints = numpy.int32(0)
        t0 = time.time()
        k1 = self.program.compact(queue, shape, wg,
            gpu_keypoints.data, output.data, counter.data, startkeypoints, nbkeypoints)
        res = output.get()
        if (PRINT_KEYPOINTS):
            print res
        count = counter.get()[0]
        t1 = time.time()
        ref, count_ref = my_compact(keypoints, nbkeypoints)
        t2 = time.time()

        print("Kernel counter : %s / Python counter : %s / True value : %s" % (count, count_ref, nbkeypoints - nb_ones))

        res_sort_arg = res[:, 2].argsort(axis=0)
        res_sort = res[res_sort_arg]
        ref_sort_arg = ref[:, 2].argsort(axis=0)
        ref_sort = ref[ref_sort_arg]
        if (PRINT_KEYPOINTS):
            print "Delta matrix :"
            print (abs(res_sort - ref_sort) > 1e-5).sum()
        delta = abs((res_sort - ref_sort)).max()
        self.assert_(delta < 1e-5, "delta=%s" % (delta))
        self.assertEqual(count, count_ref, "counters are the same")
        logger.info("delta=%s" % delta)
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Compact operation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#17
0
    def setUp(self):
        self.input = scipy.misc.lena().astype(numpy.float32)
        self.gpu_in = pyopencl.array.to_device(queue, self.input)
        self.gpu_tmp = pyopencl.array.empty(queue, self.input.shape, dtype=numpy.float32, order="C")
        self.gpu_out = pyopencl.array.empty(queue, self.input.shape, dtype=numpy.float32, order="C")
        kernel_path = os.path.join(os.path.dirname(os.path.abspath(sift.__file__)), "convolution.cl")
        kernel_src = open(kernel_path).read()
#        compile_options = "-D NIMAGE=%i" % self.input.size
#        logger.info("Compiling file %s with options %s" % (kernel_path, compile_options))
#        self.program = pyopencl.Program(ctx, kernel_src).build(options=compile_options)
        self.program = pyopencl.Program(ctx, kernel_src).build()
        self.IMAGE_W = numpy.int32(self.input.shape[-1])
        self.IMAGE_H = numpy.int32(self.input.shape[0])
        self.wg = (2, 256)
        self.shape = calc_size(self.input.shape, self.wg)
示例#18
0
    def setUp(self):
        self.input = scipy.misc.lena().astype(numpy.float32)
        self.input = numpy.ascontiguousarray(self.input[0:507,0:209])
        
        self.gpu_in = pyopencl.array.to_device(queue, self.input)
        self.gpu_tmp = pyopencl.array.empty(queue, self.input.shape, dtype=numpy.float32, order="C")
        self.gpu_out = pyopencl.array.empty(queue, self.input.shape, dtype=numpy.float32, order="C")
        kernel_path = os.path.join(os.path.dirname(os.path.abspath(sift.__file__)), "convolution.cl")
        kernel_src = open(kernel_path).read()
#        compile_options = "-D NIMAGE=%i" % self.input.size
#        logger.info("Compiling file %s with options %s" % (kernel_path, compile_options))
#        self.program = pyopencl.Program(ctx, kernel_src).build(options=compile_options)
        self.program = pyopencl.Program(ctx, kernel_src).build()
        self.IMAGE_W = numpy.int32(self.input.shape[-1])
        self.IMAGE_H = numpy.int32(self.input.shape[0])
        self.wg = (256, 2)
        self.shape = calc_size((self.input.shape[1], self.input.shape[0]), self.wg)
示例#19
0
    def test_gradient(self):
        """
        tests the gradient kernel (norm and orientation)
        """
        
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, scale, nb_keypoints, width, height, DOGS, g = local_maxmin_setup()
        self.mat = numpy.ascontiguousarray(g[1])
        self.height, self.width = numpy.int32(self.mat.shape)
        self.gpu_mat = pyopencl.array.to_device(queue, self.mat)
        self.gpu_grad = pyopencl.array.empty(queue, self.mat.shape, dtype=numpy.float32, order="C")
        self.gpu_ori = pyopencl.array.empty(queue, self.mat.shape, dtype=numpy.float32, order="C")
        self.shape = calc_size((self.width, self.height), self.wg)

        t0 = time.time()
        k1 = self.program.compute_gradient_orientation(queue, self.shape, self.wg, self.gpu_mat.data, self.gpu_grad.data, self.gpu_ori.data, self.width, self.height)
        res_norm = self.gpu_grad.get()
        res_ori = self.gpu_ori.get()
        t1 = time.time()
        ref_norm, ref_ori = my_gradient(self.mat)
        t2 = time.time()
        delta_norm = abs(ref_norm - res_norm).max()
        delta_ori = abs(ref_ori - res_ori).max()
        if (PRINT_KEYPOINTS):
            rmin, cmin = 0, 0
            rmax, cmax = rmin+6, cmin+6
            
            print res_norm[-rmax,cmin:cmax]
            print ""
            print ref_norm[-rmax,cmin:cmax]
            fig = pylab.figure()
            sp1 = fig.add_subplot(121)
            sp1.imshow(res_norm, interpolation="nearest")
            sp2 = fig.add_subplot(122)
            sp2.imshow(ref_norm, interpolation="nearest")
            fig.show()
            raw_input("enter")
        
        self.assert_(delta_norm < 1e-4, "delta_norm=%s" % (delta_norm))
        self.assert_(delta_ori < 1e-4, "delta_ori=%s" % (delta_ori))
        logger.info("delta_norm=%s" % delta_norm)
        logger.info("delta_ori=%s" % delta_ori)

        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Gradient computation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#20
0
    def setUp(self):
        self.input = numpy.ascontiguousarray(scipy.misc.lena()[:510, :511])
        self.gpudata = pyopencl.array.empty(queue, self.input.shape, dtype=numpy.float32, order="C")
        kernel_path = os.path.join(os.path.dirname(os.path.abspath(sift.__file__)), "preprocess.cl")
        reduct_path = os.path.join(os.path.dirname(os.path.abspath(sift.__file__)), "reductions.cl")
        kernel_src = open(kernel_path).read()
        reduct_src = open(reduct_path).read()
        self.program = pyopencl.Program(ctx, kernel_src).build()
        self.reduction = pyopencl.Program(ctx, reduct_src).build()
        self.IMAGE_W = numpy.int32(self.input.shape[-1])
        self.IMAGE_H = numpy.int32(self.input.shape[0])
        self.wg = (32, 16)#(256, 2) #(32, 16) # (2, 256)
        self.shape = calc_size((self.IMAGE_W, self.IMAGE_H), self.wg)
#        print self.shape
        self.binning = (4, 2) # Nota if wg < ouptup size weired results are expected !
#        self.binning = (2, 2)
        self.red_size = 128 #reduction size
        self.twofivefive = pyopencl.array.to_device(queue, numpy.array([255], numpy.float32))
        self.buffers_max_min = pyopencl.array.empty(queue, (self.red_size, 2), dtype=numpy.float32)  # temporary buffer for max/min reduction
        self.buffers_min = pyopencl.array.empty(queue, (1), dtype=numpy.float32)
        self.buffers_max = pyopencl.array.empty(queue, (1), dtype=numpy.float32)
示例#21
0
    def test_bin(self):
        """
        Test binning kernel
        """
        lint = numpy.ascontiguousarray(self.input, numpy.float32)

        out_shape = tuple(int(math.ceil((float(i) / j))) for i, j in zip(self.input.shape, self.binning))
        t0 = time.time()
        inp_gpu = pyopencl.array.to_device(queue, lint)
        out_gpu = pyopencl.array.empty(queue, out_shape, dtype=numpy.float32, order="C")
        k1 = self.program.bin(queue, calc_size((out_shape[1], out_shape[0]), self.wg), self.wg, inp_gpu.data, out_gpu.data,
                                 numpy.int32(self.binning[1]), numpy.int32(self.binning[0]),
                                 numpy.int32(lint.shape[1]), numpy.int32(lint.shape[0]),
                                 numpy.int32(out_shape[1]), numpy.int32(out_shape[0]))
        res = out_gpu.get()
        t1 = time.time()
        ref = binning(lint, self.binning) / self.binning[0] / self.binning[1]
        t2 = time.time()
#        print ref.shape, res.shape
        delta = abs(ref - res).max()
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Binning took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
            fig = pylab.figure()
            fig.suptitle('Binning by %s,%s' % self.binning)
            sp1 = fig.add_subplot(221)
            sp1.imshow(lint, interpolation="nearest")
            sp1.set_title("Input")
            sp2 = fig.add_subplot(222)
            sp2.imshow(ref, interpolation="nearest")
            sp2.set_title("Reference")
            sp3 = fig.add_subplot(223)
            sp3.imshow(ref - res, interpolation="nearest")
            sp3.set_title("Delta= %s" % delta)
            sp4 = fig.add_subplot(224)
            sp4.imshow(res, interpolation="nearest")
            sp4.set_title("GPU")
            fig.show()
            raw_input("enter")
        self.assert_(delta < 1e-6, "delta=%s" % delta)
示例#22
0
    def test_compact(self):
        """
        tests the "compact" kernel
        """
        
        nbkeypoints = 1000 #constant value
        keypoints = numpy.random.rand(nbkeypoints,4).astype(numpy.float32)
        for i in range(0,nbkeypoints):
            if ((numpy.random.rand(1))[0] < 0.75):
                keypoints[i]=(-1,-1,-1,-1)
        
        self.gpu_keypoints = pyopencl.array.to_device(queue, keypoints)
        self.output = pyopencl.array.empty(queue, (nbkeypoints,4), dtype=numpy.float32, order="C")
        self.output.fill(-1.0,queue)
        self.counter = pyopencl.array.zeros(queue, (1,), dtype=numpy.int32, order="C")
        wg = max(self.wg),
        shape = calc_size((keypoints.shape[0],), wg)
        nbkeypoints = numpy.int32(nbkeypoints)
        
        t0 = time.time()
        k1 = self.program.compact(queue, shape, wg, 
        	self.gpu_keypoints.data, self.output.data, self.counter.data, nbkeypoints)
        res = self.output.get()
        count = self.counter.get()[0]
        t1 = time.time()
        ref, count_ref = my_compact(keypoints,nbkeypoints)
       
        t2 = time.time()

        res_sort_arg = res[:,0].argsort(axis=0)     
        res_sort = res[res_sort_arg]
        ref_sort_arg = ref[:,0].argsort(axis=0)     
        ref_sort = ref[ref_sort_arg]
        delta = abs((res_sort - ref_sort)).max()
        self.assert_(delta < 1e-5, "delta=%s" % (delta))
        logger.info("delta=%s" % delta)
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Compact operation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#23
0
    def normalize(self, raw, flat1, flat2):
        '''
        Normalizes the image with OpenCL
        NOTA: images are passed as numpy.array, so the read (edf/hdf5) is done before calling this function
        '''

        output_height, output_width = raw.shape
        shape = calc_size((output_width, output_height), self.wg)
        gpu_raw = pyopencl.array.to_device(self.queue, raw)
        gpu_dark3 = pyopencl.array.to_device(self.queue, self.dark_data)
        gpu_dark6 = pyopencl.array.to_device(self.queue, self.dark_ref)
        gpu_flat1 = pyopencl.array.to_device(self.queue, flat1)
        gpu_flat2 = pyopencl.array.to_device(self.queue, flat2)
        gpu_output = pyopencl.array.empty(self.queue, (output_height, output_width), dtype=numpy.float32, order="C")
        output_height, output_width = numpy.int32((output_height, output_width))

        k1 = self.program.correction(self.queue, shape, self.wg,
                gpu_raw.data, gpu_dark3.data, gpu_dark6.data, gpu_flat1.data, gpu_flat2.data, gpu_output.data,
                output_width, output_height)
        res = gpu_output.get()

        return res
示例#24
0
    def test_descriptor(self):
        '''
        #tests keypoints descriptors creation kernel
        '''
        
        #descriptor_setup :
        keypoints_o, nb_keypoints, actual_nb_keypoints, grad, ori = descriptor_setup()
        #keypoints should be a compacted vector of keypoints
        keypoints_start, keypoints_end = 0, 80 #actual_nb_keypoints
        #keypoints_start, keypoints_end = 20, 30
        keypoints = keypoints_o[keypoints_start:keypoints_end]
        print("Working on keypoints : [%s,%s]"%(keypoints_start,keypoints_end))
        wg = max(self.wg),
        shape = calc_size((keypoints_o.shape[0],), wg)
        gpu_keypoints = pyopencl.array.to_device(queue,keypoints_o)
        gpu_descriptors = pyopencl.array.empty(queue, (keypoints_end-keypoints_start+1,128), dtype=numpy.uint8, order="C")
        gpu_grad = pyopencl.array.to_device(queue, grad)
        gpu_ori = pyopencl.array.to_device(queue, ori)
        
        local_size = (keypoints_end-keypoints_start+1)*128*4
        local_mem = pyopencl.LocalMemory(local_size)
        
        keypoints_start, keypoints_end = numpy.int32(keypoints_start), numpy.int32(keypoints_end)
        grad_height, grad_width = numpy.int32(grad.shape)

        t0 = time.time()
        k1 = self.program.descriptor(queue, shape, wg, 
            gpu_keypoints.data, gpu_descriptors.data, local_mem, gpu_grad.data, gpu_ori.data,
            keypoints_start, keypoints_end, grad_width, grad_height)    	
        res = gpu_descriptors.get()
        t1 = time.time()
        
        ref = my_descriptor(keypoints_o, grad, ori, keypoints_start, keypoints_end)
        
        #print res[0:30,0:15]
        print ""
        #print ref[0:30,0:15]
        print res[0:keypoints_end-keypoints_start,0:15]-ref[0:keypoints_end-keypoints_start,0:15]
        
        t2 = time.time()
        
        #print keypoints_before_orientation[0:33]
        #if (PRINT_KEYPOINTS):
        
         
#        TODO
#        #sort to compare added keypoints
#        d1,d2,d3,d4 = keypoints_compare(ref,res)
#        self.assert_(d1 < 1e-4, "delta_cols=%s" % (d1))
#        self.assert_(d2 < 1e-4, "delta_rows=%s" % (d2))
#        self.assert_(d3 < 1e-4, "delta_sigma=%s" % (d3))
#        self.assert_(d4 < 1e-4, "delta_angle=%s" % (d4))
#        logger.info("delta_cols=%s" % d1)
#        logger.info("delta_rows=%s" % d2)
#        logger.info("delta_sigma=%s" % d3)
#        logger.info("delta_angle=%s" % d4)
        
        
        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Descriptors computation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
示例#25
0
    def test_local_maxmin(self):
        """
        tests the local maximum/minimum detection kernel
        """
        #local_maxmin_setup :
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, s, nb_keypoints, width, height, DOGS, g = local_maxmin_setup(
        )
        self.s = numpy.int32(s)  #1, 2, 3 ... not 4 nor 0.
        self.gpu_dogs = pyopencl.array.to_device(queue, DOGS)
        self.output = pyopencl.array.empty(queue, (nb_keypoints, 4),
                                           dtype=numpy.float32,
                                           order="C")
        self.output.fill(-1.0, queue)  #memset for invalid keypoints
        self.counter = pyopencl.array.zeros(queue, (1, ),
                                            dtype=numpy.int32,
                                            order="C")
        nb_keypoints = numpy.int32(nb_keypoints)
        self.shape = calc_size((DOGS.shape[1], DOGS.shape[0] * DOGS.shape[2]),
                               self.wg)  #it's a 3D vector !!

        t0 = time.time()
        k1 = self.program.local_maxmin(queue, self.shape, self.wg,
                                       self.gpu_dogs.data, self.output.data,
                                       border_dist, peakthresh, octsize,
                                       EdgeThresh0, EdgeThresh,
                                       self.counter.data, nb_keypoints, self.s,
                                       width, height)

        res = self.output.get()
        self.keypoints1 = self.output  #for further use
        self.actual_nb_keypoints = self.counter.get()[0]  #for further use

        t1 = time.time()
        ref, actual_nb_keypoints2 = my_local_maxmin(DOGS, peakthresh,
                                                    border_dist, octsize,
                                                    EdgeThresh0, EdgeThresh,
                                                    nb_keypoints, self.s,
                                                    width, height)
        t2 = time.time()

        #we have to sort the arrays, for peaks orders is unknown for GPU
        res_peaks = res[(res[:, 0].argsort(axis=0)), 0]
        ref_peaks = ref[(ref[:, 0].argsort(axis=0)), 0]
        res_r = res[(res[:, 1].argsort(axis=0)), 1]
        ref_r = ref[(ref[:, 1].argsort(axis=0)), 1]
        res_c = res[(res[:, 2].argsort(axis=0)), 2]
        ref_c = ref[(ref[:, 2].argsort(axis=0)), 2]
        #res_s = res[(res[:,3].argsort(axis=0)),3]
        #ref_s = ref[(ref[:,3].argsort(axis=0)),3]
        delta_peaks = abs(ref_peaks - res_peaks).max()
        delta_r = abs(ref_r - res_r).max()
        delta_c = abs(ref_c - res_c).max()

        if (PRINT_KEYPOINTS):
            print(
                "keypoints after 2 steps of refinement: (s= %s, octsize=%s) %s"
                % (self.s, octsize, self.actual_nb_keypoints))
            #print("For ref: %s" %(ref_peaks[ref_peaks!=-1].shape))
            print res[0:self.actual_nb_keypoints]  #[0:74]
            #print ref[0:32]

        self.assert_(delta_peaks < 1e-4, "delta_peaks=%s" % (delta_peaks))
        self.assert_(delta_r < 1e-4, "delta_r=%s" % (delta_r))
        self.assert_(delta_c < 1e-4, "delta_c=%s" % (delta_c))
        logger.info("delta_peaks=%s" % delta_peaks)
        logger.info("delta_r=%s" % delta_r)
        logger.info("delta_c=%s" % delta_c)

        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Local extrema search took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
示例#26
0
    def test_transform(self):
        '''
        tests transform kernel
        '''

        if (USE_LENA):
            #original image
            image = scipy.misc.lena().astype(numpy.float32)
            image = numpy.ascontiguousarray(image[0:512, 0:512])
            image_height, image_width = image.shape
            #transformation
            angle = 1.9  #numpy.pi/5.0
            #        matrix = numpy.array([[numpy.cos(angle),-numpy.sin(angle)],[numpy.sin(angle),numpy.cos(angle)]],dtype=numpy.float32)
            #        offset_value = numpy.array([1000.0, 100.0],dtype=numpy.float32)
            #        matrix = numpy.array([[0.9,0.2],[-0.4,0.9]],dtype=numpy.float32)
            #        offset_value = numpy.array([-20.0,256.0],dtype=numpy.float32)
            matrix = numpy.array([[1.0, -0.75], [0.7, 0.5]],
                                 dtype=numpy.float32)

            offset_value = numpy.array([250.0, -150.0], dtype=numpy.float32)

            image2 = scipy.ndimage.interpolation.affine_transform(
                image, matrix, offset=offset_value, order=1, mode="constant")

        else:  #use images of a stack
            image = scipy.misc.imread("/home/paleo/Titanium/test/frame0.png")
            image2 = scipy.misc.imread("/home/paleo/Titanium/test/frame1.png")
            offset_value = numpy.array([0.0, 0.0], dtype=numpy.float32)
            image_height, image_width = image.shape
            image2_height, image2_width = image2.shape

        fill_value = numpy.float32(0.0)
        mode = numpy.int32(1)

        if IMAGE_RESHAPE:  #turns out that image should always be reshaped
            output_height, output_width = int(3000), int(3000)
            image, image_height, image_width = self.image_reshape(
                image, output_height, output_width, image_height, image_width)
            image2, image2_height, image2_width = self.image_reshape(
                image2, output_height, output_width, image2_height,
                image2_width)

        else:
            output_height, output_width = int(
                image_height * numpy.sqrt(2)), int(image_width * numpy.sqrt(2))
        print "Image : (%s, %s) -- Output: (%s, %s)" % (
            image_height, image_width, output_height, output_width)

        #perform correction by least square
        sol, MSE = self.matching_correction(image, image2)
        print sol

        correction_matrix = numpy.zeros((2, 2), dtype=numpy.float32)
        correction_matrix[0] = sol[0:2, 0]
        correction_matrix[1] = sol[3:5, 0]
        matrix_for_gpu = correction_matrix.reshape(4, 1)  #for float4 struct
        offset_value[0] = sol[2, 0]
        offset_value[1] = sol[5, 0]

        wg = 8, 8
        shape = calc_size((output_width, output_height), wg)
        gpu_image = pyopencl.array.to_device(queue, image2)
        gpu_output = pyopencl.array.empty(queue, (output_height, output_width),
                                          dtype=numpy.float32,
                                          order="C")
        gpu_matrix = pyopencl.array.to_device(queue, matrix_for_gpu)
        gpu_offset = pyopencl.array.to_device(queue, offset_value)
        image_height, image_width = numpy.int32((image_height, image_width))
        output_height, output_width = numpy.int32(
            (output_height, output_width))

        t0 = time.time()
        k1 = self.program.transform(queue, shape, wg, gpu_image.data,
                                    gpu_output.data, gpu_matrix.data,
                                    gpu_offset.data, image_width, image_height,
                                    output_width, output_height, fill_value,
                                    mode)
        res = gpu_output.get()
        t1 = time.time()
        #        print res[0,0]

        ref = scipy.ndimage.interpolation.affine_transform(
            image2,
            correction_matrix,
            offset=offset_value,
            output_shape=(output_height, output_width),
            order=1,
            mode="constant",
            cval=fill_value)
        t2 = time.time()

        delta = abs(res - image)
        delta_arg = delta.argmax()
        delta_max = delta.max()
        #        delta_mse_res = ((res-image)**2).sum()/image.size
        #        delta_mse_ref = ((ref-image)**2).sum()/image.size
        at_0, at_1 = delta_arg / output_width, delta_arg % output_width
        print("Max error: %f at (%d, %d)" % (delta_max, at_0, at_1))
        #        print("Mean Squared Error Res/Original : %f" %(delta_mse_res))
        #        print("Mean Squared Error Ref/Original: %f" %(delta_mse_ref))
        print("minimal MSE according to least squares : %f" % MSE)
        #        print res[at_0,at_1]
        #        print ref[at_0,at_1]

        SHOW_FIGURES = True
        if SHOW_FIGURES:
            fig = pylab.figure()
            sp1 = fig.add_subplot(221, title="Input image")
            sp1.imshow(image, interpolation="nearest")
            sp2 = fig.add_subplot(222, title="Image after deformation")
            sp2.imshow(image2, interpolation="nearest")
            sp2 = fig.add_subplot(223, title="Corrected image (OpenCL)")
            sp2.imshow(res, interpolation="nearest")
            sp2 = fig.add_subplot(224, title="Corrected image (Scipy)")
            sp2.imshow(ref, interpolation="nearest")
            #            sp2.imshow(ref, interpolation="nearest")
            #            sp3 = fig.add_subplot(223,title="delta (max = %f)" %delta_max)
            #            sh3 = sp3.imshow(delta[:,:], interpolation="nearest")
            #            cbar = fig.colorbar(sh3)
            fig.show()
            raw_input("enter")

        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Transformation took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
示例#27
0
    def test_transform(self):
        '''
        tests transform kernel
        '''    



        if (USE_LENA):
            #original image
            image = scipy.misc.lena().astype(numpy.float32)
            image = numpy.ascontiguousarray(image[0:512,0:512])
            image_height, image_width = image.shape
            #transformation
            angle = 1.9 #numpy.pi/5.0
    #        matrix = numpy.array([[numpy.cos(angle),-numpy.sin(angle)],[numpy.sin(angle),numpy.cos(angle)]],dtype=numpy.float32)
    #        offset_value = numpy.array([1000.0, 100.0],dtype=numpy.float32)
    #        matrix = numpy.array([[0.9,0.2],[-0.4,0.9]],dtype=numpy.float32)
    #        offset_value = numpy.array([-20.0,256.0],dtype=numpy.float32)
            matrix = numpy.array([[1.0,-0.75],[0.7,0.5]],dtype=numpy.float32)
            
            offset_value = numpy.array([250.0, -150.0],dtype=numpy.float32)
           
            image2 = scipy.ndimage.interpolation.affine_transform(image,matrix,offset=offset_value,order=1, mode="constant")
        
        else: #use images of a stack
            image = scipy.misc.imread("/home/paleo/Titanium/test/frame0.png")
            image2 = scipy.misc.imread("/home/paleo/Titanium/test/frame1.png")
            offset_value = numpy.array([0.0, 0.0],dtype=numpy.float32)
            image_height, image_width = image.shape
            image2_height, image2_width = image2.shape
            
        fill_value = numpy.float32(0.0)
        mode = numpy.int32(1)   
            
        if IMAGE_RESHAPE: #turns out that image should always be reshaped
            output_height, output_width = int(3000), int(3000)
            image, image_height, image_width = self.image_reshape(image,output_height,output_width,image_height,image_width)
            image2, image2_height, image2_width = self.image_reshape(image2,output_height,output_width,image2_height,image2_width) 
            
            
       
        else: output_height, output_width = int(image_height*numpy.sqrt(2)),int(image_width*numpy.sqrt(2))
        print "Image : (%s, %s) -- Output: (%s, %s)" %(image_height, image_width , output_height, output_width)
        
        
        
            
        
        
        
        
        
        
        
        #perform correction by least square
        sol, MSE = self.matching_correction(image,image2)
        print sol
        
        
        correction_matrix = numpy.zeros((2,2),dtype=numpy.float32)
        correction_matrix[0] = sol[0:2,0]
        correction_matrix[1] = sol[3:5,0]
        matrix_for_gpu = correction_matrix.reshape(4,1) #for float4 struct
        offset_value[0] = sol[2,0]
        offset_value[1] = sol[5,0]
        
        wg = 8,8
        shape = calc_size((output_width,output_height), wg)
        gpu_image = pyopencl.array.to_device(queue, image2)
        gpu_output = pyopencl.array.empty(queue, (output_height, output_width), dtype=numpy.float32, order="C")
        gpu_matrix = pyopencl.array.to_device(queue,matrix_for_gpu)
        gpu_offset = pyopencl.array.to_device(queue,offset_value)
        image_height, image_width = numpy.int32((image_height, image_width))
        output_height, output_width = numpy.int32((output_height, output_width))
        
        t0 = time.time()
        k1 = self.program.transform(queue, shape, wg,
                gpu_image.data, gpu_output.data, gpu_matrix.data, gpu_offset.data, 
                image_width, image_height, output_width, output_height, fill_value, mode)
        res = gpu_output.get()
        t1 = time.time()
#        print res[0,0]
        
        ref = scipy.ndimage.interpolation.affine_transform(image2,correction_matrix,
            offset=offset_value, output_shape=(output_height,output_width),order=1, mode="constant", cval=fill_value)
        t2 = time.time()
        
        delta = abs(res-image)
        delta_arg = delta.argmax()
        delta_max = delta.max()
#        delta_mse_res = ((res-image)**2).sum()/image.size
#        delta_mse_ref = ((ref-image)**2).sum()/image.size
        at_0, at_1 = delta_arg/output_width, delta_arg%output_width
        print("Max error: %f at (%d, %d)" %(delta_max, at_0, at_1))
#        print("Mean Squared Error Res/Original : %f" %(delta_mse_res))
#        print("Mean Squared Error Ref/Original: %f" %(delta_mse_ref))
        print("minimal MSE according to least squares : %f" %MSE)
#        print res[at_0,at_1]
#        print ref[at_0,at_1]
        
        SHOW_FIGURES = True
        if SHOW_FIGURES:
            fig = pylab.figure()
            sp1 = fig.add_subplot(221,title="Input image")
            sp1.imshow(image, interpolation="nearest")
            sp2 = fig.add_subplot(222,title="Image after deformation")
            sp2.imshow(image2, interpolation="nearest")
            sp2 = fig.add_subplot(223,title="Corrected image (OpenCL)")
            sp2.imshow(res, interpolation="nearest")
            sp2 = fig.add_subplot(224,title="Corrected image (Scipy)")
            sp2.imshow(ref, interpolation="nearest")
#            sp2.imshow(ref, interpolation="nearest")
#            sp3 = fig.add_subplot(223,title="delta (max = %f)" %delta_max)
#            sh3 = sp3.imshow(delta[:,:], interpolation="nearest")
#            cbar = fig.colorbar(sh3)
            fig.show()
            raw_input("enter")


        if PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Transformation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))