示例#1
0
def elementwise_add(op: Sgemm) -> List[Kernel]:
    A = op.inputs["A"]
    B = op.inputs["B"]
    C = op.outputs["C"]

    assert ChannelMode.get_mode(A) == ChannelMode.get_mode(B)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()
    uniform_injector.register({
        "A": A,
        "B": B,
        "s_c": texture_stride(C),
        "d_C": [op.M, op.N],
        "s_C": [op.N, 1],
        "d_a": texture_shape(A),
        "s_a": texture_stride(A),
        "s_A": [op.K, 1] if op.transpose_A else [1, op.M],
        "d_b": texture_shape(B),
        "s_b": texture_stride(B),
        "s_B": [op.N, 1] if op.transpose_B else [1, op.K],
        "K": op.K
    })

    source = generate_template(mode=ChannelMode.get_mode(A), K=op.K)
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, C)

    return [kernel]
示例#2
0
def elementwise_add(op: Softsign) -> List[Kernel]:
    x0 = op.inputs["x0"]
    y = op.outputs["y"]

    shapes, strides = optimize_loop_structure([x0, y], y)

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "X0": x0,
        "s_y": texture_stride(y),
        "d_Y": shapes[y],
        "s_Y": strides[y],
        "d_x0": texture_shape(x0),
        "s_x0": texture_stride(x0),
        "d_X0": shapes[x0],
        "s_X0": strides[x0],
    })

    source = template_R if ChannelMode.get_mode(
        y) == ChannelModeEnum.R else template_RGBA
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, y)

    return [kernel]
示例#3
0
def texture_shape(v: Variable):
    channel_mode = ChannelMode.get_mode(v)
    if channel_mode == ChannelModeEnum.R:
        texture_length = v.size

    elif channel_mode == ChannelModeEnum.RGBA:
        texture_length = (v.size + 4 - 1) // 4

    else:
        raise NotImplementedError(f"Unknown channel mode: {channel_mode}")

    return [
        texture_length if texture_length < 2048 else 2048,
        (texture_length + 2048 - 1) // 2048
    ]
示例#4
0
def texture_stride(v: Variable):
    result = []
    channel_mode = ChannelMode.get_mode(v)
    if channel_mode == ChannelModeEnum.R:
        s = 1

    elif channel_mode == ChannelModeEnum.RGBA:
        s = 4

    else:
        raise NotImplementedError(f"Unknown channel mode: {channel_mode}")

    for d in texture_shape(v):
        result.append(s)
        s *= d
    return result
示例#5
0
def elementwise_add(op: Im2Col) -> List[Kernel]:
    im = op.inputs["im"]
    col = op.outputs["col"]

    assert im.order == OrderNHWC
    assert col.order == OrderNHWC or col.order == OrderCNHW

    name_injector = KernelNameInjector(op)
    uniform_injector = UniformInjector()

    uniform_injector.register({
        "im": im,
        "s_col": texture_stride(col),
        "d_Col": col.shape,
        "s_Col": col.stride,
        "d_im": texture_shape(im),
        "s_im": texture_stride(im),
        "d_Im": im.shape,
        "s_Im": im.stride,
        "C1": im.shape_dict[Axis.C],
        "H1": im.shape_dict[Axis.H],
        "W1": im.shape_dict[Axis.W],
        "KH": op.KH,
        "KW": op.KW,
        "DH": op.DH,
        "DW": op.DW,
        "SH": op.SH,
        "SW": op.SW,
        "PH": op.PH,
        "PW": op.PW,
    })

    source = template_R if ChannelMode.get_mode(
        col) == ChannelModeEnum.R else template_RGBA
    source = uniform_injector.inject(source)
    source = name_injector.inject(source)
    kernel = Kernel(source, name_injector.name, uniform_injector.samplers,
                    uniform_injector.uniforms, col)

    return [kernel]
示例#6
0
    def generate(cls, graph: Graph, **kwargs):
        graph, _ = WebGLOptimizeRule().optimize(graph)
        if flags.DEBUG:
            traverse.dump(graph)
            with open("cg.dot", "w") as f:
                f.write(traverse.dump_dot(graph))

        memory_layout = allocate(graph)

        allocations = {}
        for v, a in memory_layout.allocations.items():
            allocations[v] = WebGLAllocation(
                allocation=a, channel_mode=ChannelMode.get_mode(v))

        constants_map = {}
        for constant in traverse.filter_nodes(
                traverse.listup_nodes(graph),
                ConstantVariable):  # type: ConstantVariable
            constants_map[constant.name] = {
                "byte_offset": memory_layout[constant].offset * 4,
                "size": constant.size
            }

        constant_encoder = ConstantEncoder.get_encoder(
            kwargs.get("constant_encoder_name", None))
        constants_bytes = constant_encoder.encode(memory_layout)

        kernels = cls.generate_kernels(graph)

        descriptor = GraphDescriptor(kernels=kernels,
                                     memory_layout=memory_layout,
                                     inputs=graph.inputs,
                                     outputs=graph.outputs,
                                     constants_encoding=constant_encoder.name,
                                     allocations=allocations,
                                     constants_map=constants_map,
                                     licenses=graph.licenses)

        return GraphExecutionData(graph, descriptor, constants_bytes)