def depth2space(op: Depth2Space) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C2 = y.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c2 = variable_position_y.w; int h1 = h2 / {r}; int w1 = w2 / {r}; int c1 = c2 + (w2-w1*{r})*{C2} + (h2-h1*{r})*{C2}*{r}; gl_FragColor.r = """, texel_fetch(x, change_order("vec4(n, h1, w1, c1)", OrderNHWC, x.order)), """.r; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def col2im(op: Col2Im) -> List[Kernel]: col = op.inputs["col"] im = op.outputs["im"] assert col.order.check_same_axes( Order([Axis.N, Axis.H, Axis.W, Axis.KH, Axis.KW, Axis.C])) assert col.order.axes_dict[Axis.KH] + 2 == col.order.axes_dict[ Axis.KW] + 1 == col.order.axes_dict[Axis.C] == 5 assert im.order.check_same_axes(OrderNHWC) assert ChannelMode.get(col) == ChannelModeEnum.R assert ChannelMode.get(im) == ChannelModeEnum.R col_shape = col.shape[0:3] + (mul(col.shape[3:6]), ) col_stride = [mul(col_shape[i + 1:]) for i in range(len(col_shape))] col_order = Order(col.order.axes[0:3] + (Axis.C, )) code = KernelCode([ """ void main() { ivec4 variable_position_im = """, change_order(get_output_position(im), im.order, OrderNHWC), f"""; int n = variable_position_im.x; int h1 = variable_position_im.y; int w1 = variable_position_im.z; int c1 = variable_position_im.w; float sum = 0.0; for (int kh = 0; kh < {op.KH}; kh++) {{ int h2 = (h1 + {op.PH} - kh) / {op.SH}; if (mod(h1 + {op.PH} - kh, {op.SH}) != 0 || h2 < 0 || h2 >= {col.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w2 = (w1 + {op.PW} - kw) / {op.SW}; if (mod(w1 + {op.PW} - kw, {op.SW}) != 0 || w2 < 0 || w2 >= {col.shape_dict[Axis.W]}) continue; int khkwc1 = (kh * {op.KW} + kw) * {im.shape_dict[Axis.C]} + c1; sum += texture2D(""", col, ",", convert_coord( change_order("vec4(n, h2, w2, khkwc1)", OrderNHWC, col_order), col_shape, col_stride, texture_shape(col)[:2][::-1], texture_stride(col)[:2][::-1]), """).r; } } gl_FragColor.r = sum; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, im)]
def average_pooling_2d(op: AveragePooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R if op.parameters["divide_without_padding"]: divider_init = "float divider = 1e-8;" divider_add = "divider += 1.0;" divider_get = "divider" else: divider_init = "" divider_add = "" divider_get = str(float(op.ksize[0] * op.ksize[1])) code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float sum = 0.0; {divider_init} for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; sum += """, texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), f""".r; {divider_add} }} }} gl_FragColor.r = sum / {divider_get}; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def max_pooling_2d(op: MaxPooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([ """ void main() { ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float v = -1e5; for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 * {op.SH} - {op.PH} + kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]}) continue; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 * {op.SW} - {op.PW} + kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]}) continue; v = max(""", texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), """.r, v); } } gl_FragColor.r = v; } """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def average_pooling_2d(op: Unpooling2D) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) code = KernelCode([ f""" void main() {{ ivec4 variable_position_y = """, change_order(get_output_position(y), y.order, OrderNHWC), f"""; int n = variable_position_y.x; int h2 = variable_position_y.y; int w2 = variable_position_y.z; int c = variable_position_y.w; float sum = 0.0; for (int kh = 0; kh < {op.KH}; kh++) {{ int h1 = h2 + {op.PH} - kh; if (h1 < 0 || h1 >= {x.shape_dict[Axis.H]} * {op.SH}) continue; if (mod(h1, {op.SH}) != 0) continue; h1 /= {op.SH}; for (int kw = 0; kw < {op.KW}; kw++) {{ int w1 = w2 + {op.PW} - kw; if (w1 < 0 || w1 >= {x.shape_dict[Axis.W]} * {op.SW}) continue; if (mod(w1, {op.SW}) != 0) continue; w1 /= {op.SW}; sum += """, texel_fetch(x, change_order("vec4(n, h1, w1, c)", OrderNHWC, x.order)), f""".r; }} }} gl_FragColor.r = sum; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]
def space2depth(op: Space2Depth) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] r = op.parameters['r'] C1 = x.shape_dict[Axis.C] assert x.order.check_same_axes(OrderNHWC) assert y.order.check_same_axes(OrderNHWC) assert ChannelMode.get(x) == ChannelModeEnum.R assert ChannelMode.get(y) == ChannelModeEnum.R code = KernelCode([""" void main() { ivec4 variable_position_y = """, get_output_position(y), f"""; int n = variable_position_y[{y.order.axes_dict[Axis.N]}]; int h2 = variable_position_y[{y.order.axes_dict[Axis.H]}]; int w2 = variable_position_y[{y.order.axes_dict[Axis.W]}]; int c2 = variable_position_y[{y.order.axes_dict[Axis.C]}]; int c1 = mod(c2, {C1}); int h1 = h2 * {r} + c2 / {C1} / {r}; int w1 = w2 * {r} + mod(c2 / {C1}, {r}); ivec4 variable_position_x; variable_position_x[{x.order.axes_dict[Axis.N]}] = n; variable_position_x[{x.order.axes_dict[Axis.H]}] = h1; variable_position_x[{x.order.axes_dict[Axis.W]}] = w1; variable_position_x[{x.order.axes_dict[Axis.C]}] = c1; gl_FragColor.r = """, texel_fetch(x, "variable_position_x"), """.r; } """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def reshape(op: Tile) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] # y -{broadcast}-> x_position_in_y_order -{change_order}-> x code = KernelCode([f""" void main() {{ gl_FragColor.r = """, texel_fetch(x, change_order( ExpressionNode(["mod(", get_output_position(y), ", ", ivec([x.shape_dict[a] for a in y.order.axes]), ")"]), y.order, x.order )), f""".r; }} """], name=op.__class__.__name__) source = code.generate() return [Kernel( source, code.name, code.samplers, code.uniforms, y )]
def reinterpret_axis(op: ReinterpretAxis) -> List[Kernel]: x = op.inputs["x"] y = op.outputs["y"] y_axes_order_in_x_order = Order( [op.out_order.axes[op.in_order.axes_dict[a]] for a in x.order.axes]) # FIXME: optimize code = KernelCode([ f""" void main() {{ gl_FragColor.r = """, texel_fetch( x, change_order(get_output_position(y), y.order, y_axes_order_in_x_order)), f""".r; }} """ ], name=op.__class__.__name__) source = code.generate() return [Kernel(source, code.name, code.samplers, code.uniforms, y)]