def depth2space(op: Depth2Space) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C2 = y.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c2 = variable_position_y.w; int h1 = h2 / {r}; int w1 = w2 / {r}; int c1 = c2 + (w2-w1*{r})*{C2} + (h2-h1*{r})*{C2}*{r}; gl_FragColor.r = """, texel_fetch(x, change_order("vec4(n, h1, w1, c1)", OrderNHWC, x.order)), """.r; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def average_pooling_2d(op: AveragePooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R if op.parameters["divide_without_padding"]: divider_init = "float divider = 1e-8;" divider_add = "divider += 1.0;" divider_get = "divider" else: divider_init = "" divider_add = "" divider_get = str(float(op.ksize[0] * op.ksize[1])) code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float sum = 0.0; {divider_init} for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; sum += """, texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), f""".r; {divider_add} }} }} gl_FragColor.r = sum / {divider_get}; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def max_pooling_2d(op: MaxPooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float v = -1e5; for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; v = max(""", texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), """.r, v); } } gl_FragColor.r = v; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def average_pooling_2d(op: Unpooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) code = KernelCode([ f""" void main() {{ ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float sum = 0.0; for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 + {op.PH} - kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]} * {op.SH}) continue; if (mod(h1, {op.SH}) != 0) continue; h1 /= {op.SH}; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 + {op.PW} - kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]} * {op.SW}) continue; if (mod(w1, {op.SW}) != 0) continue; w1 /= {op.SW}; sum += """, texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), f""".r; }} }} gl_FragColor.r = sum; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def space2depth(op: Space2Depth) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C1 = x.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([""" void main() { ivec4 variable_position_y = """, get_output_position(y), f"""; int n = variable_position_y[{y.order.axes_dict[Axis.N]}]; int h2 = variable_position_y[{y.order.axes_dict[Axis.H]}]; int w2 = variable_position_y[{y.order.axes_dict[Axis.W]}]; int c2 = variable_position_y[{y.order.axes_dict[Axis.C]}]; int c1 = mod(c2, {C1}); int h1 = h2 * {r} + c2 / {C1} / {r}; int w1 = w2 * {r} + mod(c2 / {C1}, {r}); ivec4 variable_position_x; variable_position_x[{x.order.axes_dict[Axis.N]}] = n; variable_position_x[{x.order.axes_dict[Axis.H]}] = h1; variable_position_x[{x.order.axes_dict[Axis.W]}] = w1; variable_position_x[{x.order.axes_dict[Axis.C]}] = c1; gl_FragColor.r = """, texel_fetch(x, "variable_position_x"), """.r; } """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def reshape(op: Tile) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] # y -{broadcast}-> x_position_in_y_order -{change_order}-> x code = KernelCode([f""" void main() {{ gl_FragColor.r = """, texel_fetch(x, change_order( ExpressionNode(["mod(", get_output_position(y), ", ", ivec([x.shape_dict[a] for a in y.order.axes]), ")"]), y.order, x.order )), f""".r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def reinterpret_axis(op: ReinterpretAxis) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] y_axes_order_in_x_order = Order( [op.out_order.axes[op.in_order.axes_dict[a]] for a in x.order.axes]) # FIXME: optimize code = KernelCode([ f""" void main() {{ gl_FragColor.r = """, texel_fetch( x, change_order(get_output_position(y), y.order, y_axes_order_in_x_order)), f""".r; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def im2col(op: Im2Col) -> List[Kernel]: im = op.inputs["im"] col = op.outputs["col"] H1 = im.shape_dict[Axis.H] W1 = im.shape_dict[Axis.W] C1 = im.shape_dict[Axis.C] assert col.order.check_same_axes( Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C])) assert col.order.axes_dict[Axis.KH] + 2 == col.order.axes_dict[ Axis.KW] + 1 == col.order.axes_dict[Axis.C] == 5 assert im.order.check_same_axes(OrderNHWC) assert ChannelMode.get(im) == ChannelModeEnum.R col_shape = col.shape[0:3] + (mul(col.shape[3:6]), ) col_stride = [mul(col_shape[i + 1:]) for i in range(len(col_shape))] col_order = Order(col.order.axes[0:3] + (Axis.C, )) if ChannelMode.get(col) == ChannelModeEnum.R: code = KernelCode([ """ void main() { ivec4 variable_position_col = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(col)[:2], texture_stride(col)[:2], col_shape, col_stride), col_order, OrderNHWC), f"""; int n = variable_position_col.x; int h2 = variable_position_col.y; int w2 = variable_position_col.z; int khkwc1 = variable_position_col.w; int kh = khkwc1 / {C1} / {op.KW}; int kw = khkwc1 / {C1} - kh * {op.KW}; int c1 = khkwc1 - (kh * {op.KW} + kw) * {C1}; int h1 = h2 * {op.SH} - {op.PH} + kh * {op.DH}; int w1 = w2 * {op.SW} - {op.PW} + kw * {op.DW}; if (h1 < 0 || h1 >= {H1} || w1 < 0 || w1 >= {W1}) {{ gl_FragColor.r = 0.0; }} else {{ gl_FragColor.r = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1)", OrderNHWC, im.order)), f""".r; }} }} """ ], name="Im2Col_R") elif ChannelMode.get(col) == ChannelModeEnum.RGBA: code = KernelCode([ """ void main() { ivec4 variable_position_col = """, change_order( convert_position("gl_FragCoord.yx", texture_shape(col)[:2], texture_stride(col)[:2], col_shape, col_stride), col_order, OrderNHWC), f"""; int n = variable_position_col.x; int h2 = variable_position_col.y; int w2 = variable_position_col.z; int khkwc1 = variable_position_col.w; int kh = khkwc1 / {C1} / {op.KW}; int kw = khkwc1 / {C1} - kh * {op.KW}; int c1 = khkwc1 - (kh * {op.KW} + kw) * {C1}; int h1 = h2 * {op.SH} - {op.PH} + kh * {op.DH}; int w1 = w2 * {op.SW} - {op.PW} + kw * {op.DW}; if (h1 < 0 || h1 >= {H1} || w1 < 0 || w1 >= {W1}) {{ gl_FragColor = vec4(0.0, 0.0, 0.0, 0.0); }} else {{ gl_FragColor.r = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 0)", OrderNHWC, im.order)), f""".r; gl_FragColor.g = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 1)", OrderNHWC, im.order)), f""".r; gl_FragColor.b = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 2)", OrderNHWC, im.order)), f""".r; gl_FragColor.a = """, texel_fetch( im, change_order("vec4(n, h1, w1, c1 + 3)", OrderNHWC, im.order)), f""".r; }} }} """ ], name="Im2Col_RGBA") else: raise NotImplementedError source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, col)]