def maximum_filter_2d( arr2D, footprint ): ## Make sure arr2D is our datatype float32 and footprint of int32 arr2DMaxed = numpy.empty_like(arr2D) head, tail = os.path.split( os.path.abspath(__file__) ) # Used so that we can always get the kernel which should be in the same directory as this file maxFunction = open(head + "/2DSlidingMaxFootprintKernel.c", "rt") maxFunction = SourceModule(maxFunction.read()) slidingMaxKernel = maxFunction.get_function("slidingMaxiumum2D") blockSize = [ 16, 16 ] # To-do: Add a variable to this, can affect performance based on GPU gridSize = getGridSize( blockSize, arr2D.shape ) # Get the size of our grid based on the size of a grid (blocksize) slidingMaxKernel( cuda.In(arr2D), # Input cuda.Out(arr2DMaxed), # Output numpy.int32(footprint.shape[1]), # Kernel Size numpy.int32(arr2D.shape[1]), # Row Stride numpy.int32(1), # Column Stride numpy.int32(int(arr2D.shape[1])), # Array Column Count numpy.int32(int(arr2D.shape[0])), # Array Row Count cuda.In(footprint), block=(blockSize[0], blockSize[1], 1), grid=(gridSize[0], gridSize[1], 1)) return arr2DMaxed