示例#1
0
    def bench_cpu1d_lut(self):
        self.update_mp()
        print("Working on processor: %s" % self.get_cpu())
        label = "1D_CPU_parallel_OpenMP"
        results = {}
        self.new_curve(results, label)
        for param in ds_list:
            self.update_mp()
            ref = self.get_ref(param)
            fn = datasets[param]
            ai = pyFAI.load(param)
            data = fabio.open(fn).data
            size = data.size
            N = min(data.shape)
            print("1D integration of %s %.1f Mpixel -> %i bins" %
                  (op.basename(fn), size / 1e6, N))
            t0 = time.time()
            res = ai.xrpd_LUT(data, N)
            t1 = time.time()
            self.print_init(t1 - t0)
            print "lut.shape=", ai._lut_integrator.lut.shape, "lut.nbytes (MB)", ai._lut_integrator.size * 8 / 1e6
            self.update_mp()
            del ai, data
            self.update_mp()
            setup = """
import pyFAI,fabio
ai=pyFAI.load(r"%s")
data = fabio.open(r"%s").data
N=min(data.shape)
out=ai.xrpd_LUT(data,N)""" % (param, fn)
            t = timeit.Timer("ai.xrpd_LUT(data,N,safe=False)", setup)
            tmin = min([
                i / self.nbr
                for i in t.repeat(repeat=self.repeat, number=self.nbr)
            ])
            self.print_exec(tmin)
            R = utilstest.Rwp(res, ref)
            print(
                "%sResults are bad with R=%.3f%s" %
                (self.WARNING, R, self.ENDC)
                if R > self.LIMIT else "%sResults are good with R=%.3f%s" %
                (self.OKGREEN, R, self.ENDC))
            self.update_mp()
            if R < self.LIMIT:
                size /= 1e6
                tmin *= 1000.0
                results[size] = tmin
                self.new_point(size, tmin)
            self.update_mp()
        self.print_sep()
        self.meth.append(label)
        self.results[label] = results
        self.update_mp()
示例#2
0
    def bench_gpu1d(self,
                    devicetype="gpu",
                    useFp64=True,
                    platformid=None,
                    deviceid=None):
        self.update_mp()
        print("Working on %s, in " % devicetype +
              ("64 bits mode" if useFp64 else "32 bits mode") + "(%s.%s)" %
              (platformid, deviceid))
        if ocl is None or not ocl.select_device(devicetype):
            print("No pyopencl or no such device: skipping benchmark")
            return
        results = {}
        label = "Forward_OpenCL_%s_%s_bits" % (devicetype,
                                               ("64" if useFp64 else "32"))
        first = True
        for param in ds_list:
            self.update_mp()
            fn = datasets[param]
            ai = pyFAI.load(param)
            data = fabio.open(fn).data
            size = data.size
            N = min(data.shape)
            print("1D integration of %s %.1f Mpixel -> %i bins (%s)" %
                  (op.basename(fn), size / 1e6, N,
                   ("64 bits mode" if useFp64 else "32 bits mode")))

            try:
                t0 = time.time()
                res = ai.xrpd_OpenCL(data,
                                     N,
                                     devicetype=devicetype,
                                     useFp64=useFp64,
                                     platformid=platformid,
                                     deviceid=deviceid)
                t1 = time.time()
            except Exception as error:
                print("Failed to find an OpenCL GPU (useFp64:%s) %s" %
                      (useFp64, error))
                continue
            self.print_init(t1 - t0)
            self.update_mp()
            ref = ai.xrpd(data, N)
            R = utilstest.Rwp(res, ref)
            print(
                "%sResults are bad with R=%.3f%s" %
                (self.WARNING, R, self.ENDC)
                if R > self.LIMIT else "%sResults are good with R=%.3f%s" %
                (self.OKGREEN, R, self.ENDC))
            setup = """
import pyFAI,fabio
ai=pyFAI.load(r"%s")
data = fabio.open(r"%s").data
N=min(data.shape)
out=ai.xrpd_OpenCL(data,N, devicetype=r"%s", useFp64=%s, platformid=%s, deviceid=%s)""" % (
                param, fn, devicetype, useFp64, platformid, deviceid)
            t = timeit.Timer("ai.xrpd_OpenCL(data,N,safe=False)", setup)
            tmin = min([
                i / self.nbr
                for i in t.repeat(repeat=self.repeat, number=self.nbr)
            ])
            del t
            self.update_mp()
            self.print_exec(tmin)
            print("")
            if R < self.LIMIT:
                size /= 1e6
                tmin *= 1000.0
                results[size] = tmin
                if first:
                    self.new_curve(results, label)
                    first = False
                else:
                    self.new_point(size, tmin)
                self.update_mp()
        self.print_sep()
        self.meth.append(label)
        self.results[label] = results
        self.update_mp()
示例#3
0
    def bench_cpu1d_csr_ocl(self,
                            devicetype="GPU",
                            platformid=None,
                            deviceid=None,
                            padded=False,
                            block_size=32):
        self.update_mp()
        if (ocl is None):
            print("No pyopencl")
            return
        if (platformid is None) or (deviceid is None):
            platdev = ocl.select_device(devicetype)
            if not platdev:
                print("No such OpenCL device: skipping benchmark")
                return
            platformid, deviceid = platdev
        print(
            "Working on device: %s platform: %s device: %s padding: %s block_size= %s"
            %
            (devicetype, ocl.platforms[platformid],
             ocl.platforms[platformid].devices[deviceid], padded, block_size))
        label = "1D_%s_parallel_OpenCL, padded=%s, block_size=%s" % (
            devicetype, padded, block_size)
        first = True
        results = {}
        for param in ds_list:
            self.update_mp()
            ref = self.get_ref(param)
            fn = datasets[param]
            ai = pyFAI.load(param)
            data = fabio.open(fn).data
            size = data.size
            N = min(data.shape)
            print("1D integration of %s %.1f Mpixel -> %i bins" %
                  (op.basename(fn), size / 1e6, N))
            t0 = time.time()
            try:
                res = ai.xrpd_CSR_OCL(data,
                                      N,
                                      devicetype=devicetype,
                                      platformid=platformid,
                                      deviceid=deviceid,
                                      padded=padded,
                                      block_size=block_size)
            except MemoryError as error:
                print(error)
                break
            t1 = time.time()
            self.print_init(t1 - t0)
            self.update_mp()
            ai.reset()
            del ai, data
            self.update_mp()
            setup = """
import pyFAI,fabio
ai=pyFAI.load(r"%s")
data = fabio.open(r"%s").data
N=min(data.shape)
out=ai.xrpd_CSR_OCL(data,N,devicetype=r"%s",platformid=%s,deviceid=%s,padded=%s,block_size=%s)""" % (
                param, fn, devicetype, platformid, deviceid, padded,
                block_size)
            t = timeit.Timer(
                "ai.xrpd_CSR_OCL(data,N,safe=False,padded=%s,block_size=%s)" %
                (padded, block_size), setup)
            tmin = min([
                i / self.nbr
                for i in t.repeat(repeat=self.repeat, number=self.nbr)
            ])
            self.update_mp()
            del t
            self.update_mp()
            self.print_exec(tmin)
            R = utilstest.Rwp(res, ref)
            print(
                "%sResults are bad with R=%.3f%s" %
                (self.WARNING, R, self.ENDC)
                if R > self.LIMIT else "%sResults are good with R=%.3f%s" %
                (self.OKGREEN, R, self.ENDC))
            if R < self.LIMIT:
                size /= 1e6
                tmin *= 1000.0
                results[size] = tmin
                if first:
                    self.new_curve(results, label)
                    first = False
                else:
                    self.new_point(size, tmin)
            self.update_mp()
        self.print_sep()
        self.meth.append(label)
        self.results[label] = results
        self.update_mp()
示例#4
0
    def bench_1d_ocl_csr(self, check=False, opencl=None):
        """
        @param method: method to be bechmarked
        @param check: check results vs ref if method is LUT based
        @param opencl: dict containing platformid, deviceid and devicetype
        """
        method = "ocl_csr"
        self.update_mp()
        if opencl:
            if (ocl is None):
                print("No pyopencl")
                return
            if (opencl.get("platformid") is None) or (opencl.get("deviceid") is
                                                      None):
                platdev = ocl.select_device(opencl.get("devicetype"))
                if not platdev:
                    print("No such OpenCL device: skipping benchmark")
                    return
                platformid, deviceid = opencl["platformid"], opencl[
                    "deviceid"] = platdev
            devicetype = opencl["devicetype"] = ocl.platforms[
                platformid].devices[deviceid].type
            print("Working on device: %s platform: %s device: %s" %
                  (devicetype, ocl.platforms[platformid],
                   ocl.platforms[platformid].devices[deviceid]))
            label = "1D_" + method + "_" + devicetype
            method += "_%i,%i" % (opencl["platformid"], opencl["deviceid"])
        else:
            print("Working on processor: %s" % self.get_cpu())
            label = "1D_" + self.LABELS[method]
        results = {}
        flops = {}
        mem_band = {}
        first = True
        param = "Pilatus1M.poni"
        block_size_list = [1, 2, 4, 8, 16, 32, 64, 128, 256]
        for block_size in block_size_list:
            self.update_mp()
            fn = datasets[param]
            setup = self.setup_1d % (param, fn)
            stmt = self.stmt_1d % (method, block_size)
            exec setup
            size = data.size / 1.0e6
            print("1D integration of %s %.1f Mpixel -> %i bins" %
                  (op.basename(fn), size, N))
            t0 = time.time()
            res = eval(stmt)
            self.print_init(time.time() - t0)
            self.update_mp()
            if check:
                if "csr" in method:
                    print("csr: size= %s \t nbytes %.3f MB " %
                          (ai._csr_integrator.data.size,
                           ai._csr_integrator.lut_nbytes / 2**20))

            bins = ai._csr_integrator.bins
            nnz = ai._csr_integrator.nnz
            parallel_reduction = sum(
                [2**i for i in range(1, int(log2(block_size)))])

            FLOPs = 9 * nnz + 11 * parallel_reduction + 1 * bins
            mem_access = (2 * block_size * bins + 5 * nnz + 7 * bins) * 4

            del ai, data
            self.update_mp()

            t_repeat = []
            for j in range(self.repeat):
                t = []
                exec setup
                for i in range(self.nbr):
                    eval(stmt)
                for e in ai._ocl_csr_integr.events:
                    if "integrate" in e[0]:
                        et = 1e-6 * (e[1].profile.end - e[1].profile.start)
                        t.append(et)
                exec(self.unsetup)
                t_repeat.append(numpy.mean(t))

            tmin = min(t_repeat)
            self.update_mp()
            self.print_exec(tmin)
            if check:
                ref = self.get_ref(param)
                R = utilstest.Rwp(res, ref)
                print(
                    "%sResults are bad with R=%.3f%s" %
                    (self.WARNING, R, self.ENDC)
                    if R > self.LIMIT else "%sResults are good with R=%.3f%s" %
                    (self.OKGREEN, R, self.ENDC))
                self.update_mp()
                if R < self.LIMIT:
                    results[block_size] = tmin
                    flops[block_size] = (FLOPs / tmin) * 1e-6
                    mem_band[block_size] = (mem_access / tmin) * 1e-6
                    self.update_mp()
            else:
                results[block_size] = tmin
                flops[block_size] = FLOPs / tmin
                mem_band[block_size] = mem_access / tmin
            if first:
                self.new_curve(results, label)
                first = False
            else:
                self.new_point(block_size, tmin)
        self.print_sep()
        self.meth.append(label)
        self.results[label] = results
        self.flops[label] = flops
        self.mem_band[label] = mem_band
        self.update_mp()
示例#5
0
 def bench_1d(self, method="splitBBox", check=False, opencl=None):
     """
     @param method: method to be bechmarked
     @param check: check results vs ref if method is LUT based
     @param opencl: dict containing platformid, deviceid and devicetype
     """
     self.update_mp()
     if opencl:
         if (ocl is None):
             print("No pyopencl")
             return
         if (opencl.get("platformid") is None) or (opencl.get("deviceid") is
                                                   None):
             platdev = ocl.select_device(opencl.get("devicetype"))
             if not platdev:
                 print("No such OpenCL device: skipping benchmark")
                 return
             platformid, deviceid = opencl["platformid"], opencl[
                 "deviceid"] = platdev
         else:
             platformid, deviceid = opencl["platformid"], opencl["deviceid"]
         devicetype = opencl["devicetype"] = ocl.platforms[
             platformid].devices[deviceid].type
         print("Working on device: %s platform: %s device: %s" %
               (devicetype, ocl.platforms[platformid],
                ocl.platforms[platformid].devices[deviceid]))
         label = "1D_" + (self.LABELS[method] % devicetype)
         method += "_%i,%i" % (opencl["platformid"], opencl["deviceid"])
         memory_error = (pyFAI.opencl.pyopencl.MemoryError, MemoryError,
                         pyFAI.opencl.pyopencl.RuntimeError, RuntimeError)
     else:
         print("Working on processor: %s" % self.get_cpu())
         label = "1D_" + self.LABELS[method]
         memory_error = (MemoryError, RuntimeError)
     results = {}
     first = True
     for param in ds_list:
         self.update_mp()
         fn = datasets[param]
         setup = self.setup_1d % (param, fn)
         stmt = self.stmt_1d % method
         exec setup
         size = data.size / 1.0e6
         print("1D integration of %s %.1f Mpixel -> %i bins" %
               (op.basename(fn), size, N))
         try:
             t0 = time.time()
             res = eval(stmt)
             self.print_init(time.time() - t0)
         except memory_error as error:
             print(error)
             break
         self.update_mp()
         if check:
             if "lut" in method:
                 print("lut: shape= %s \t nbytes %.3f MB " %
                       (ai._lut_integrator.lut.shape,
                        ai._lut_integrator.lut_nbytes / 2**20))
             elif "csr" in method:
                 print("csr: size= %s \t nbytes %.3f MB " %
                       (ai._csr_integrator.data.size,
                        ai._csr_integrator.lut_nbytes / 2**20))
         del ai, data
         self.update_mp()
         try:
             t = timeit.Timer(stmt, setup + stmt)
             tmin = min([
                 i / self.nbr
                 for i in t.repeat(repeat=self.repeat, number=self.nbr)
             ])
         except memory_error as error:
             print(error)
             break
         self.update_mp()
         self.print_exec(tmin)
         tmin *= 1000.0
         if check:
             ref = self.get_ref(param)
             R = utilstest.Rwp(res, ref)
             print(
                 "%sResults are bad with R=%.3f%s" %
                 (self.WARNING, R, self.ENDC)
                 if R > self.LIMIT else "%sResults are good with R=%.3f%s" %
                 (self.OKGREEN, R, self.ENDC))
             self.update_mp()
             if R < self.LIMIT:
                 results[size] = tmin
                 self.update_mp()
                 if first:
                     if opencl:
                         self.new_curve(results, label, style="--")
                     else:
                         self.new_curve(results, label, style="-")
                     first = False
                 else:
                     self.new_point(size, tmin)
         else:
             results[size] = tmin
             if first:
                 self.new_curve(results, label)
                 first = False
             else:
                 self.new_point(size, tmin)
     self.print_sep()
     self.meth.append(label)
     self.results[label] = results
     self.update_mp()
示例#6
0
ai = pyFAI.load("testimages/Pilatus1M.poni")
data = fabio.open("testimages/Pilatus1M.edf").data
ref = ai.xrpd_LUT(data, 1000)
obt = ai.xrpd_LUT_OCL(data, 1000)
print abs(obt[1] - ref[1]).max()
lut = ai._lut_integrator.lut
gpu = pyFAI.ocl_azim_lut.OCL_LUT_Integrator(lut, data.size, "GPU")
print gpu.device
img = numpy.zeros(data.shape, dtype="float32")
print "ref", (data == -2).sum(), (data == -1).sum()
pyopencl.enqueue_copy(gpu._queue, img, gpu._cl_mem["image"])  #.wait()
print "obt", (img == -2).sum(), (img == -1).sum()

out_cyt = ai._lut_integrator.integrate(data)
out_ocl = gpu.integrate(data)[0]
print "NoCorr R=", utilstest.Rwp((out_cyt[0], out_ocl), out_cyt[:2],
                                 "no corrections")
nodummy = out_cyt[1]
plot(nodummy + 1, label="no_corr")
out_cyt = ai._lut_integrator.integrate(data, dummy=-2, delta_dummy=1.5)
out_ocl = gpu.integrate(data, dummy=-2, delta_dummy=1.5)[0]
print "Dummy  R=", utilstest.Rwp((out_cyt[0], out_ocl), out_cyt[:2], "Dummy")
#print "nodummy/Dummy", utilstest.Rwp((out_cyt[0], out_cyt[1]), (out_cyt[0], nodummy), "nodummy/Dummy")

dark = numpy.random.random(data.shape)
out_cyt = ai._lut_integrator.integrate(data, dark=dark)
out_ocl = gpu.integrate(data, dark=dark)[0]
print "Dark  R=", utilstest.Rwp((out_cyt[0], out_ocl), out_cyt[:2], "dark")

flat = 2 * numpy.ones_like(data)
out_cyt = ai._lut_integrator.integrate(data, flat=flat)
out_ocl = gpu.integrate(data, flat=flat)[0]