Python Metal.MTLCreateSystemDefaultDevice示例

编程语言: Python

类/类型: Metal

方法/功能: MTLCreateSystemDefaultDevice

hotexamples.com的示例: 3

Python Metal.MTLCreateSystemDefaultDevice - 已找到3个示例。这些是从开源项目中提取的最受好评的Metal.MTLCreateSystemDefaultDevice 来自程序包 raytrace_weekend_numpy现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Metal(5)

MTLCreateSystemDefaultDevice(3)

MTLIndirectCommandBufferExecutionRange(2)

MTLSize(2)

MTLSizeAndAlign(2)

MTLSizeMake(2)

MTLTextureSwizzleChannels(1)

MTLTextureSwizzleChannelsMake(1)

MTLStageInRegionIndirectArguments(1)

MTLRegionMake1D(1)

MTLTriangleTessellationFactorsHalf(1)

MTLVertexAmplificationViewMapping(1)

MTLViewport(1)

MTLScissorRect(1)

MTLSamplePositionMake(1)

MTLSamplePosition(1)

MTLRegionMake3D(1)

MTLRegionMake2D(1)

MTLAccelerationStructureSizes(1)

MTLRegion(1)

MTLClearColor(1)

MTLOriginMake(1)

MTLOrigin(1)

MTLMapIndirectArguments(1)

MTLIndirectCommandBufferExecutionRangeMake(1)

MTLDrawPrimitivesIndirectArguments(1)

MTLDrawPatchIndirectArguments(1)

MTLDrawIndexedPrimitivesIndirectArguments(1)

MTLDispatchThreadgroupsIndirectArguments(1)

MTLCounterResultTimestamp(1)

MTLCounterResultStatistic(1)

MTLCounterResultStageUtilization(1)

MTLCoordinate2DMake(1)

MTLClearColorMake(1)

MTLQuadTessellationFactorsHalf(1)

示例#1

显示文件

文件： imageconverter.py 项目： lgxz/osxphotos

    def __init__(self):
        """ return existing singleton or create a new one """

        if hasattr(self, "context"):
            return
        """ initialize CIContext """
        context_options = NSDictionary.dictionaryWithDictionary_({
            "workingColorSpace":
            Quartz.CoreGraphics.kCGColorSpaceExtendedSRGB,
            "workingFormat":
            Quartz.kCIFormatRGBAh,
        })
        mtldevice = Metal.MTLCreateSystemDefaultDevice()
        self.context = Quartz.CIContext.contextWithMTLDevice_options_(
            mtldevice, context_options)

示例#2

显示文件

文件： ops_metal.py 项目： ycechungAI/tinygrad

# pip3 install pyobjc-framework-MetalPerformanceShaders
from tinygrad.tensor import Function
from tinygrad.helpers import binary_broadcast
import numpy as np
import Metal
import MetalPerformanceShaders

device = Metal.MTLCreateSystemDefaultDevice()
mtl_queue = device.newCommandQueue()
mtl_buffers = []


def cmd_buffer():
    ret = mtl_queue.commandBuffer()
    mtl_buffers.append(ret)
    return ret


class MetalBuffer:
    def __init__(self, shape, hostbuf=None):
        self.sz = np.prod(shape) * 4
        # TODO: fix this limit
        assert self.sz < 16384
        if hostbuf is not None:
            if isinstance(hostbuf, MetalBuffer):
                self.mtl = hostbuf.mtl
            else:
                self.mtl = device.newBufferWithBytes_length_options_(
                    hostbuf.astype(np.float32).data, self.sz,
                    Metal.MTLResourceStorageModeShared)
        else:

示例#3

显示文件

    def compile(self):
        try:
            import Metal
        except ImportError:
            logging.error("Failure to import Metal, please install pyobjc-framework-Metal")
            raise
        import objc
        dtype_out = vaex.dtype(self.return_dtype).numpy
        if dtype_out.name == "float64":
            dtype_out = np.dtype("float32")
            warnings.warn("Casting output from float64 to float32 since Metal does not support float64")
        ast_node = expresso.parse_expression(self.expression)
        cppcode = node_to_cpp(ast_node)
        typemap = {'float32': 'float',
                   'float64': 'float'}  # we downcast!
        for name in vaex.array_types._type_names_int:
            typemap[name] = f'{name}_t'
        typenames = [typemap[dtype.name] for dtype in self.argument_dtypes]
        metal_args = [f'const device {typename} *{name}_array [[buffer({i})]]' for i, (typename, name) in
                      enumerate(zip(typenames, self.arguments))]
        code_get_scalar = [f'    {typename} {name} = {name}_array[id];\n' for typename, name, in zip(typenames, self.arguments)]
        sourcecode = '''
#include <metal_stdlib>
using namespace metal;

float arctan2(float y, float x) {
    return atan2(y, x);
}

template<typename T>
T where(bool condition, T y, T x) {
    return condition ? x : y;
}
kernel void vaex_kernel(%s,
                        device %s *vaex_output [[buffer(%i)]],
                        uint id [[thread_position_in_grid]]) {
%s
    vaex_output[id] = %s;
}
''' % (', '.join(metal_args), typemap[dtype_out.name], len(metal_args), ''.join(code_get_scalar), cppcode) # typemap[self.dtype_out],
        if self.verbose:
            print('Generated code:\n' + sourcecode)
        with open('test.metal', 'w') as f:
            print(f'Write to {f.name}')
            f.write(sourcecode)


        storage = threading.local()
        lock = threading.Lock()

        # following https://developer.apple.com/documentation/metal/basic_tasks_and_concepts/performing_calculations_on_a_gpu?language=objc
        self.device = Metal.MTLCreateSystemDefaultDevice()
        opts = Metal.MTLCompileOptions.new()
        self.library = self.device.newLibraryWithSource_options_error_(sourcecode, opts, objc.NULL)
        if self.library[0] is None:
            msg = f"Error compiling: {sourcecode}, sourcecode"
            logger.error(msg)
            raise RuntimeError(msg)
        kernel_name = "vaex_kernel"
        self.vaex_kernel = self.library[0].newFunctionWithName_(kernel_name)
        desc = Metal.MTLComputePipelineDescriptor.new()
        desc.setComputeFunction_(self.vaex_kernel)
        state = self.device.newComputePipelineStateWithDescriptor_error_(desc, objc.NULL)
        command_queue = self.device.newCommandQueue()

        def wrapper(*args):
            args = [vaex.array_types.to_numpy(ar) for ar in args]
            def getbuf(name, value=None, dtype=np.dtype("float32"), N=None):
                buf = getattr(storage, name, None)
                if value is not None:
                    N = len(value)
                    dtype = value.dtype
                if dtype.name == "float64":
                    warnings.warn("Casting input argument from float64 to float32 since Metal does not support float64")
                    dtype = np.dtype("float32")
                nbytes = N * dtype.itemsize
                if buf is not None and buf.length() != nbytes:
                    # doesn't match size, create a new one
                    buf = None
                # create a buffer
                if buf is None:
                    buf = self.device.newBufferWithLength_options_(nbytes, 0)
                    setattr(storage, name, buf)
                # copy data to buffer
                if value is not None:
                    mv = buf.contents().as_buffer(buf.length())
                    buf_as_numpy = np.frombuffer(mv, dtype=dtype)
                    buf_as_numpy[:] = value.astype(dtype, copy=False)
                return buf
            input_buffers = [getbuf(name, chunk) for name, chunk in zip(self.arguments, args)]
            output_buffer = getbuf('vaex_output', N=len(args[0]), dtype=dtype_out)
            buffers = input_buffers + [output_buffer]
            command_buffer = command_queue.commandBuffer()
            encoder = command_buffer.computeCommandEncoder()
            encoder.setComputePipelineState_(state)
            for i, buf in enumerate(buffers):
                encoder.setBuffer_offset_atIndex_(buf, 0, i)
            nitems = len(args[0])
            tpgrid = Metal.MTLSize(width=nitems, height=1, depth=1)
            # state.threadExecutionWidth() == 32 on M1 max
            # state.maxTotalThreadsPerThreadgroup() == 1024 on M1 max
            tptgroup = Metal.MTLSize(width=state.threadExecutionWidth(), height=state.maxTotalThreadsPerThreadgroup()//state.threadExecutionWidth(), depth=1)
            # this is simpler, and gives the same performance
            # tptgroup = Metal.MTLSize(width=1, height=1, depth=1)
            encoder.dispatchThreads_threadsPerThreadgroup_(tpgrid, tptgroup)
            encoder.endEncoding()
            command_buffer.commit()
            command_buffer.waitUntilCompleted()

            output_buffer_py = output_buffer.contents().as_buffer(output_buffer.length())
             # do we needs .copy() ?
            result = np.frombuffer(output_buffer_py, dtype=dtype_out)
            return result
        return wrapper