def before(self): # 2D: T maxval = (T)-INFINITY; # int argmax_0 = 0; # int argmax_1 = 0; def aux(argmax): return 'int {} = 0;'.format(argmax) self.argmaxs = conv_nd_kernel.vars('argmax', self.ndim) argmax_decls = conv_nd_kernel.map_(aux, self.argmaxs) return '\n'.join(['T maxval = (T)-(1.0/0.0);'] + argmax_decls)
def before(self): # 2D: T maxval = (T)-INFINITY; # int argmax_0 = 0; # int argmax_1 = 0; def aux(argmax): return 'int {} = 0;'.format(argmax) self.argmaxs = conv_nd_kernel.vars('argmax', self.ndim) argmax_decls = conv_nd_kernel.map_(aux, self.argmaxs) return '\n'.join(['T maxval = (T)-(1.0/0.0);'] + argmax_decls)
def _compile_loop(self, xs): # 2D: int out_x0_0 = max(0, (x_0 - k_0 + s_0) / s_0); # int out_x1_0 = min(out_0, (x_0 + s_0) / s_0); # int out_x0_1 = max(0, (x_1 - k_1 + s_1) / s_1); # int out_x1_1 = min(out_1, (x_1 + s_1) / s_1); # ... Before-part here ... # for (int out_x_0 = out_x0_0; out_x_0 < out_x1_0; ++out_x_0) { # int offset_0 = out_1 * (out_x_0 + out_0 * c0); # for (int out_x_1 = out_x0_1; out_x_1 < out_x1_1; ++out_x_1) { # int offset_1 = 1 * (out_x_1 + offset_0); # ... Main-part here ... # } # } # ... After-part here ... def aux(out_x0, out_x1, x, out, k, s): return [ 'int {} = max(0, ({} - {} + {}) / {});'.format( out_x0, x, k, s, s), 'int {} = min({}, ({} + {}) / {});'.format( out_x1, out, x, s, s) ] out_x0s = conv_nd_kernel.vars('out_x0', self.ndim) out_x1s = conv_nd_kernel.vars('out_x1', self.ndim) bounds = sum( conv_nd_kernel.map_(aux, out_x0s, out_x1s, xs, self.outs, self.ks, self.ss), []) def _loop_main(main): w = conv_nd_kernel.Writer() # Loop openings. out_xs = conv_nd_kernel.vars('out_x', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) outs1 = self.outs[1:] + [1] offsets1 = ['out_0 * c0'] + offsets[:-1] for out_x, out_x0, out_x1, offset, offset1, out1 in moves.zip( out_xs, out_x0s, out_x1s, offsets, offsets1, outs1): w.write( 'for (int {} = {}; {} < {}; ++{}) {{'.format( out_x, out_x0, out_x, out_x1, out_x), 'inc') w.write('int {} = {} * ({} + {});'.format( offset, out1, out_x, offset1)) # Write main-part. offset = offsets[-1] for l in main(offset, xs, out_xs).split('\n'): w.write(l) # Loop closings. for _ in out_xs: w.write('}', 'dec') return [w.get()] return bounds, _loop_main
def _compile_loop(self, out_xs): # 2D: int in_x0_0 = max(0, out_x_0 * s_0 - p_0); # int in_x1_0 = min(d_0, out_x_0 * s_0 + k_0 - p_0); # int in_x0_1 = max(0, out_x_1 * s_1 - p_1); # int in_x1_1 = min(d_1, out_x_1 * s_1 + k_1 - p_1); # ... Before-part here ... # for (int x_0 = in_x0_0; x_0 < in_x1_0; ++x_0) { # int offset_0 = d_1 * (x_0 + d_0 * c0); # for (int x_1 = in_x0_1; x_1 < in_x1_1; ++x_1) { # int offset_1 = 1 * (x_1 + offset_0); # ... Main-part here ... # } # } # ... After-part here ... def aux(in_x0, in_x1, d, out, k, s, p): return [ 'int {} = max(0, {} * {} - {});'.format(in_x0, out, s, p), 'int {} = min({}, {} * {} + {} - {});'.format( in_x1, d, out, s, k, p) ] in_x0s = conv_nd_kernel.vars('in_x0', self.ndim) in_x1s = conv_nd_kernel.vars('in_x1', self.ndim) bounds = sum( conv_nd_kernel.map_(aux, in_x0s, in_x1s, self.ds, out_xs, self.ks, self.ss, self.ps), []) def _loop_main(main): w = conv_nd_kernel.Writer() # Loop openings. xs = conv_nd_kernel.vars('x', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) ds1 = self.ds[1:] + [1] offsets1 = ['d_0 * c0'] + offsets[:-1] for x, in_x0, in_x1, offset, offset1, d1 in moves.zip( xs, in_x0s, in_x1s, offsets, offsets1, ds1): w.write( 'for (int {} = {}; {} < {}; ++{}) {{'.format( x, in_x0, x, in_x1, x), 'inc') w.write('int {} = {} * ({} + {});'.format( offset, d1, x, offset1)) # Write main-part. offset = offsets[-1] for l in main(offset, xs).split('\n'): w.write(l) # Loop closings. for _ in xs: w.write('}', 'dec') return [w.get()] return bounds, _loop_main
def _compile_loop(self, out_xs): # 2D: int in_x0_0 = max(0, out_x_0 * s_0 - p_0); # int in_x1_0 = min(d_0, out_x_0 * s_0 + k_0 - p_0); # int in_x0_1 = max(0, out_x_1 * s_1 - p_1); # int in_x1_1 = min(d_1, out_x_1 * s_1 + k_1 - p_1); # ... Before-part here ... # for (int x_0 = in_x0_0; x_0 < in_x1_0; ++x_0) { # int offset_0 = d_1 * (x_0 + d_0 * c0); # for (int x_1 = in_x0_1; x_1 < in_x1_1; ++x_1) { # int offset_1 = 1 * (x_1 + offset_0); # ... Main-part here ... # } # } # ... After-part here ... def aux(in_x0, in_x1, d, out, k, s, p): return [ 'int {} = max(0, {} * {} - {});'.format(in_x0, out, s, p), 'int {} = min({}, {} * {} + {} - {});'.format( in_x1, d, out, s, k, p)] in_x0s = conv_nd_kernel.vars('in_x0', self.ndim) in_x1s = conv_nd_kernel.vars('in_x1', self.ndim) bounds = sum(conv_nd_kernel.map_( aux, in_x0s, in_x1s, self.ds, out_xs, self.ks, self.ss, self.ps ), []) def _loop_main(main): w = conv_nd_kernel.Writer() # Loop openings. xs = conv_nd_kernel.vars('x', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) ds1 = self.ds[1:] + [1] offsets1 = ['d_0 * c0'] + offsets[:-1] for x, in_x0, in_x1, offset, offset1, d1 in moves.zip( xs, in_x0s, in_x1s, offsets, offsets1, ds1): w.write('for (int {} = {}; {} < {}; ++{}) {{'.format( x, in_x0, x, in_x1, x), 'inc') w.write( 'int {} = {} * ({} + {});'.format(offset, d1, x, offset1)) # Write main-part. offset = offsets[-1] for l in main(offset, xs).split('\n'): w.write(l) # Loop closings. for _ in xs: w.write('}', 'dec') return [w.get()] return bounds, _loop_main
def _compile_loop(self, xs): # 2D: int out_x0_0 = max(0, (x_0 - k_0 + s_0) / s_0); # int out_x1_0 = min(out_0, (x_0 + s_0) / s_0); # int out_x0_1 = max(0, (x_1 - k_1 + s_1) / s_1); # int out_x1_1 = min(out_1, (x_1 + s_1) / s_1); # ... Before-part here ... # for (int out_x_0 = out_x0_0; out_x_0 < out_x1_0; ++out_x_0) { # int offset_0 = out_1 * (out_x_0 + out_0 * c0); # for (int out_x_1 = out_x0_1; out_x_1 < out_x1_1; ++out_x_1) { # int offset_1 = 1 * (out_x_1 + offset_0); # ... Main-part here ... # } # } # ... After-part here ... def aux(out_x0, out_x1, x, out, k, s): return [ 'int {} = max(0, ({} - {} + {}) / {});'.format( out_x0, x, k, s, s), 'int {} = min({}, ({} + {}) / {});'.format( out_x1, out, x, s, s)] out_x0s = conv_nd_kernel.vars('out_x0', self.ndim) out_x1s = conv_nd_kernel.vars('out_x1', self.ndim) bounds = sum(conv_nd_kernel.map_( aux, out_x0s, out_x1s, xs, self.outs, self.ks, self.ss), []) def _loop_main(main): w = conv_nd_kernel.Writer() # Loop openings. out_xs = conv_nd_kernel.vars('out_x', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) outs1 = self.outs[1:] + [1] offsets1 = ['out_0 * c0'] + offsets[:-1] for out_x, out_x0, out_x1, offset, offset1, out1 in moves.zip( out_xs, out_x0s, out_x1s, offsets, offsets1, outs1): w.write('for (int {} = {}; {} < {}; ++{}) {{'.format( out_x, out_x0, out_x, out_x1, out_x), 'inc') w.write('int {} = {} * ({} + {});'.format( offset, out1, out_x, offset1)) # Write main-part. offset = offsets[-1] for l in main(offset, xs, out_xs).split('\n'): w.write(l) # Loop closings. for _ in out_xs: w.write('}', 'dec') return [w.get()] return bounds, _loop_main
def _compile_out(self): def aux(offset, d_val, max_val, offset1): return 'int {} = {} * ({} + {});'.format( offset, d_val, max_val, offset1) d_vals = conv_nd_kernel.vars('d', self.ndim)[1:] + [1] max_vals = conv_nd_kernel.vars('max', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) offsets1 = ['d_0 * c0'] + offsets[:-1] offset_strs = conv_nd_kernel.map_( aux, offsets, d_vals, max_vals, offsets1) offset_strs.append('out = in[offset_{}];'.format(self.ndim - 1)) return offset_strs
def _compile_out(self): def aux(offset, d_val, max_val, offset1): return 'int {} = {} * ({} + {});'.format(offset, d_val, max_val, offset1) d_vals = conv_nd_kernel.vars('d', self.ndim)[1:] + [1] max_vals = conv_nd_kernel.vars('max', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) offsets1 = ['d_0 * c0'] + offsets[:-1] offset_strs = conv_nd_kernel.map_(aux, offsets, d_vals, max_vals, offsets1) offset_strs.append('out = in[offset_{}];'.format(self.ndim - 1)) return offset_strs
def after(self, out_xs): # 2D: out = maxval; # int argmax_k_0 = argmax_0 + p_0 - out_x_0 * s_0; # int argmax_k_1 = argmax_1 + p_1 - out_x_1 * s_1; # indexes = (argmax_k_1 + k_1 * argmax_k_0); def aux(argmax_k, argmax, p, out_x, s): return 'int {} = {} + {} - {} * {};'.format( argmax_k, argmax, p, out_x, s) argmax_ks = conv_nd_kernel.vars('argmax_k', self.ndim) argmax_k_decls = conv_nd_kernel.map_( aux, argmax_ks, self.argmaxs, self.ps, out_xs, self.ss) indexes_set = 'indexes = {};'.format( conv_nd_kernel.muladdexp(self.ks[1:], argmax_ks[1:], argmax_ks[0])) return '\n'.join(['out = maxval;'] + argmax_k_decls + [indexes_set])
def after(self, out_xs): # 2D: out = maxval; # int argmax_k_0 = argmax_0 + p_0 - out_x_0 * s_0; # int argmax_k_1 = argmax_1 + p_1 - out_x_1 * s_1; # indexes = (argmax_k_1 + k_1 * argmax_k_0); def aux(argmax_k, argmax, p, out_x, s): return 'int {} = {} + {} - {} * {};'.format( argmax_k, argmax, p, out_x, s) argmax_ks = conv_nd_kernel.vars('argmax_k', self.ndim) argmax_k_decls = conv_nd_kernel.map_( aux, argmax_ks, self.argmaxs, self.ps, out_xs, self.ss) indexes_set = 'indexes = {};'.format( conv_nd_kernel.muladdexp(self.ks[1:], argmax_ks[1:], argmax_ks[0])) return '\n'.join(['out = maxval;'] + argmax_k_decls + [indexes_set])
def _in_params(self): # 2D: raw T in, int32 d_0, int32 d_1, int32 out_0, int32 out_1, # int32 k_0, int32 k_1, int32 s_0, int32 s_1, int32 p_0, # int32 p_1, ... def aux(x): return 'int32 {}'.format(x) in_params = self.in_params() if type(in_params) is tuple: raws = in_params[0] in_params = in_params[1] else: raws = [] vars = self.ds + self.outs + self.ks + self.ss + self.ps return ', '.join( ['raw T in'] + raws + conv_nd_kernel.map_(aux, vars) + in_params)
def _compile_out_x(self): # 2D: int out_x_0 = i / (out_1) % out_0; # int out_x_1 = i % out_1; def aux(out_x, outs): head = outs[0] tail = outs[1:] if tail: return 'int {} = i / ({}) % {};'.format( out_x, conv_nd_kernel.mulexp(tail), head) else: return 'int {} = i % {};'.format(out_x, head) out_xs = conv_nd_kernel.vars('out_x', self.ndim) out_xs_decls = conv_nd_kernel.map_( aux, out_xs, conv_nd_kernel.succ_sublists(self.outs)) return out_xs_decls, out_xs
def _in_params(self): # 2D: raw T in, int32 d_0, int32 d_1, int32 out_0, int32 out_1, # int32 k_0, int32 k_1, int32 s_0, int32 s_1, int32 p_0, # int32 p_1, ... def aux(x): return 'int32 {}'.format(x) in_params = self.in_params() if type(in_params) is tuple: raws = in_params[0] in_params = in_params[1] else: raws = [] vars = self.ds + self.outs + self.ks + self.ss + self.ps return ', '.join( ['raw T in'] + raws + conv_nd_kernel.map_(aux, vars) + in_params)
def _compile_x(self): # 2D: int x_0 = i / (d_1) % d_0 + p_0; # int x_1 = i % d_1 + p_1; def aux(x, ds, p): head = ds[0] tail = ds[1:] if tail: return 'int {} = i / ({}) % {} + {};'.format( x, conv_nd_kernel.mulexp(tail), head, p) else: return 'int {} = i % {} + {};'.format(x, head, p) xs = conv_nd_kernel.vars('x', self.ndim) xs_decls = conv_nd_kernel.map_( aux, xs, conv_nd_kernel.succ_sublists(self.ds), self.ps) return xs_decls, xs
def _compile_out_x(self): # 2D: int out_x_0 = i / (out_1) % out_0; # int out_x_1 = i % out_1; def aux(out_x, outs): head = outs[0] tail = outs[1:] if tail: return 'int {} = i / ({}) % {};'.format( out_x, conv_nd_kernel.mulexp(tail), head) else: return 'int {} = i % {};'.format(out_x, head) out_xs = conv_nd_kernel.vars('out_x', self.ndim) out_xs_decls = conv_nd_kernel.map_( aux, out_xs, conv_nd_kernel.succ_sublists(self.outs)) return out_xs_decls, out_xs
def _compile_x(self): # 2D: int x_0 = i / (d_1) % d_0 + p_0; # int x_1 = i % d_1 + p_1; def aux(x, ds, p): head = ds[0] tail = ds[1:] if tail: return 'int {} = i / ({}) % {} + {};'.format( x, conv_nd_kernel.mulexp(tail), head, p) else: return 'int {} = i % {} + {};'.format(x, head, p) xs = conv_nd_kernel.vars('x', self.ndim) xs_decls = conv_nd_kernel.map_( aux, xs, conv_nd_kernel.succ_sublists(self.ds), self.ps) return xs_decls, xs
def after(self, out_xs): # 2D: int offset_0 = d_1 * (argmax_0 + d_0 * c0); # int offset_1 = 1 * (argmax_1 + offset_0); # out = ggx[offset_1]; def aux(offset, d_val, max_val, offset1): return 'int {} = {} * ({} + {});'.format(offset, d_val, max_val, offset1) d_vals = conv_nd_kernel.vars('d', self.ndim)[1:] + [1] max_vals = conv_nd_kernel.vars('argmax', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) offsets1 = ['d_0 * c0'] + offsets[:-1] offset_strs = conv_nd_kernel.map_(aux, offsets, d_vals, max_vals, offsets1) offset_strs.append('out = ggx[offset_{}];'.format(self.ndim - 1)) return '\n'.join(offset_strs)
def main(self, offset, xs, out_xs): # 2D: int kx = (x_1 - out_x_1 * s_1 + k_1 * # (x_0 - out_x_0 * s_0 + k_0 * 0)); # if (indexes[offset_1] == kx) { # val = val + gy[offset_1]; # } def aux(x, out_x, s): return '{} - {} * {}'.format(x, out_x, s) w = conv_nd_kernel.Writer() w.write('int kx = {};'.format( conv_nd_kernel.muladdexp(self.ks, conv_nd_kernel.map_( aux, xs, out_xs, self.ss), '0'))) w.write('if (indexes[{}] == kx) {{'.format(offset), 'inc') w.write('val = val + gy[{}];'.format(offset)) w.write('}', 'dec') return w.get()
def main(self, offset, xs, out_xs): # 2D: int kx = (x_1 - out_x_1 * s_1 + k_1 * # (x_0 - out_x_0 * s_0 + k_0 * 0)); # if (indexes[offset_1] == kx) { # val = val + gy[offset_1]; # } def aux(x, out_x, s): return '{} - {} * {}'.format(x, out_x, s) w = conv_nd_kernel.Writer() w.write('int kx = {};'.format( conv_nd_kernel.muladdexp(self.ks, conv_nd_kernel.map_( aux, xs, out_xs, self.ss), '0'))) w.write('if (indexes[{}] == kx) {{'.format(offset), 'inc') w.write('val = val + gy[{}];'.format(offset)) w.write('}', 'dec') return w.get()
def after(self, out_xs): # 2D: int offset_0 = d_1 * (argmax_0 + d_0 * c0); # int offset_1 = 1 * (argmax_1 + offset_0); # out = ggx[offset_1]; def aux(offset, d_val, max_val, offset1): return 'int {} = {} * ({} + {});'.format( offset, d_val, max_val, offset1) d_vals = conv_nd_kernel.vars('d', self.ndim)[1:] + [1] max_vals = conv_nd_kernel.vars('argmax', self.ndim) offsets = conv_nd_kernel.vars('offset', self.ndim) offsets1 = ['d_0 * c0'] + offsets[:-1] offset_strs = conv_nd_kernel.map_( aux, offsets, d_vals, max_vals, offsets1) offset_strs.append('out = ggx[offset_{}];'.format(self.ndim - 1)) return '\n'.join(offset_strs)
def _compile_max_x(self): def aux(max_val, out_val, stride_val, pad_val, ksize_vals): head = ksize_vals[0] tail = ksize_vals[1:] if tail: command = 'int {} = max(0, {} * {} - {} + index / ({}) % {});' return command.format(max_val, out_val, stride_val, pad_val, conv_nd_kernel.mulexp(tail), head) else: return 'int {} = max(0, {} * {} - {} + index % {});'.format( max_val, out_val, stride_val, pad_val, head) max_vals = conv_nd_kernel.vars('max', self.ndim) out_vals = conv_nd_kernel.vars('out_x', self.ndim) stride_vals = conv_nd_kernel.vars('s', self.ndim) pad_vals = conv_nd_kernel.vars('p', self.ndim) ksize_vals = conv_nd_kernel.vars('k', self.ndim) offset_ks_decls = conv_nd_kernel.map_( aux, max_vals, out_vals, stride_vals, pad_vals, conv_nd_kernel.succ_sublists(ksize_vals)) return offset_ks_decls
def _compile_max_x(self): def aux(max_val, out_val, stride_val, pad_val, ksize_vals): head = ksize_vals[0] tail = ksize_vals[1:] if tail: command = 'int {} = max(0, {} * {} - {} + index / ({}) % {});' return command.format( max_val, out_val, stride_val, pad_val, conv_nd_kernel.mulexp(tail), head) else: return 'int {} = max(0, {} * {} - {} + index % {});'.format( max_val, out_val, stride_val, pad_val, head) max_vals = conv_nd_kernel.vars('max', self.ndim) out_vals = conv_nd_kernel.vars('out_x', self.ndim) stride_vals = conv_nd_kernel.vars('s', self.ndim) pad_vals = conv_nd_kernel.vars('p', self.ndim) ksize_vals = conv_nd_kernel.vars('k', self.ndim) offset_ks_decls = conv_nd_kernel.map_( aux, max_vals, out_vals, stride_vals, pad_vals, conv_nd_kernel.succ_sublists(ksize_vals)) return offset_ks_decls